def run_training_continuation(self, xgb_params_01, xgb_params_02, xgb_params_03): from sklearn.datasets import load_digits from sklearn.metrics import mean_squared_error digits_2class = load_digits(2) digits_5class = load_digits(5) X_2class = digits_2class['data'] y_2class = digits_2class['target'] X_5class = digits_5class['data'] y_5class = digits_5class['target'] dump_svmlight_file(X_2class, y_2class, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) dtrain_2class = xgb.DMatrix({username: temp_enc_name}) dump_svmlight_file(X_5class, y_5class, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) dtrain_5class = xgb.DMatrix({username: temp_enc_name}) gbdt_01 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=10) ntrees_01 = len(gbdt_01.get_dump()) assert ntrees_01 == 10 gbdt_02 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=0) gbdt_02.save_model(HOME_DIR + 'xgb_tc.model') #TODO(rishabh): add support for xgb_model """
def test_basic(self): dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'}) dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'}) param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} # specify validations set to watch performance watchlist = [(dtrain, 'train')] num_round = 2 bst = xgb.train(param, dtrain, num_round, watchlist) preds = bst.predict(dtrain)[0] # TODO(rishabh): support for get_label() """ labels = dtrain.get_label() err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) # error must be smaller than 10% assert err < 0.1 preds = bst.predict(dtest)[0] labels = dtest.get_label() err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) # error must be smaller than 10% assert err < 0.1 """ # TODO(rishabh): support for save_binary() """
def test_boost_from_prediction(self): # Re-construct dtrain here to avoid modification margined = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'}) bst = xgb.train({'tree_method': 'hist'}, margined, 1) predt_0 = bst.predict(margined, output_margin=True) #TODO(rishabh): implement set_base_margin() """
def test_basic_rpc(self): channel_addr = "127.0.0.1:50052" xgb.init_client(user_name=username, sym_key_file=sym_key_file, priv_key_file=priv_key_file, cert_file=cert_file, remote_addr=channel_addr) xgb.attest(verify=False) dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'}) dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'}) # Set training parameters params = { "tree_method": "hist", "n_gpus": "0", "objective": "binary:logistic", "min_child_weight": "1", "gamma": "0.1", "max_depth": "5", "verbosity": "0" } num_rounds = 2 booster = xgb.train(params, dtrain, num_rounds) predictions, num_preds = booster.predict(dtest, decrypt=False) preds = booster.decrypt_predictions(predictions, num_preds) ten_preds = preds[:10] labels = [0, 1, 0, 0, 0, 0, 1, 0, 1, 0] err = sum(1 for i in range(len(ten_preds)) if int(ten_preds[i] > 0.5) != labels[i]) / float(len(ten_preds)) # error must be smaller than 10% assert err < 0.1
def build_model(self, max_depth, num_round): dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'}) param = {'max_depth': max_depth, 'objective': 'binary:logistic', 'verbosity': 1} num_round = num_round bst = xgb.train(param, dtrain, num_round) return bst
def test_dart(self): dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'}) dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'}) param = { 'max_depth': 5, 'objective': 'binary:logistic', 'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1 } # specify validations set to watch performance watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 2 bst = xgb.train(param, dtrain, num_round, watchlist) # this is prediction preds = bst.predict(dtest, ntree_limit=num_round)[0] #TODO(rishabh): implement get_label() """ labels = dtest.get_label() err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) # error must be smaller than 10% assert err < 0.1 """ #TODO(rishabh): implement save_binary() """ # save dmatrix into binary buffer dtest.save_binary('dtest.buffer') model_path = 'xgb.model.dart' # save model bst.save_model(model_path) # load model and data in bst2 = xgb.Booster(params=param, model_file='xgb.model.dart') dtest2 = xgb.DMatrix('dtest.buffer') preds2 = bst2.predict(dtest2, ntree_limit=num_round)[0] # assert they are the same assert np.sum(np.abs(preds2 - preds)) == 0 """ def my_logloss(preds, dtrain): return #TODO(rishabh): implement get_label() """ labels = dtrain.get_label() return 'logloss', np.sum( np.log(np.where(labels, preds, 1 - preds))) """ # check whether custom evaluation metrics work #TODO: implement feval (allow definition of a loss function?) """ bst = xgb.train(param, dtrain, num_round, watchlist, feval=my_logloss) preds3 = bst.predict(dtest, ntree_limit=num_round)[0] assert all(preds3 == preds) """ #TODO(rishabh): implement get_label() """
def test_eval_metrics(self): try: from sklearn.model_selection import train_test_split except ImportError: from sklearn.cross_validation import train_test_split from sklearn.datasets import load_digits digits = load_digits(2) X = digits['data'] y = digits['target'] Xt, Xv, yt, yv = train_test_split(X, y, test_size=0.2, random_state=0) dump_svmlight_file(Xt, yt, temp_name_t) xgb.encrypt_file(temp_name_t, temp_enc_name_t, sym_key_file) dump_svmlight_file(Xv, yv, temp_name_v) xgb.encrypt_file(temp_name_v, temp_enc_name_v, sym_key_file) dtrain = xgb.DMatrix({username: temp_enc_name_t}) dvalid = xgb.DMatrix({username: temp_enc_name_v}) watchlist = [(dtrain, 'train'), (dvalid, 'val')] gbdt_01 = xgb.train(self.xgb_params_01, dtrain, num_boost_round=10) gbdt_02 = xgb.train(self.xgb_params_02, dtrain, num_boost_round=10) gbdt_03 = xgb.train(self.xgb_params_03, dtrain, num_boost_round=10) assert all(gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]) assert all(gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]) #TODO(rishabh): implement early_stopping_rounds """ gbdt_01 = xgb.train(self.xgb_params_01, dtrain, 10, watchlist, early_stopping_rounds=2) gbdt_02 = xgb.train(self.xgb_params_02, dtrain, 10, watchlist, early_stopping_rounds=2) gbdt_03 = xgb.train(self.xgb_params_03, dtrain, 10, watchlist, early_stopping_rounds=2) gbdt_04 = xgb.train(self.xgb_params_04, dtrain, 10, watchlist, early_stopping_rounds=2) assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0] assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0] assert gbdt_03.predict(dvalid)[0] != gbdt_04.predict(dvalid)[0] """ #TODO(rishabh): implement early_stopping_rounds and feval """
def test_monotone_constraints_for_exact_tree_method(self): # first check monotonicity for the 'exact' tree method params_for_constrained_exact_method = { 'tree_method': 'exact', 'verbosity': 1, 'monotone_constraints': '(1, -1)' } constrained_exact_method = xgb.train( params_for_constrained_exact_method, training_dset) assert is_correctly_constrained(constrained_exact_method)
def test_feature_names_validation(self): X = np.random.random((10, 3)) y = np.random.randint(2, size=(10, )) dump_svmlight_file(X, y, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) dm1 = xgb.DMatrix({username: temp_enc_name}) dm2 = xgb.DMatrix({username: temp_enc_name}, feature_names=("a", "b", "c")) bst = xgb.train([], dm1) bst.predict(dm1) # success self.assertRaises(ValueError, bst.predict, dm2) bst.predict(dm1) # success bst = xgb.train([], dm2) bst.predict(dm2) # success self.assertRaises(ValueError, bst.predict, dm1) bst.predict(dm2) # success
def test_monotone_constraints_for_depthwise_hist_tree_method(self): # next check monotonicity for the 'hist' tree method params_for_constrained_hist_method = { 'tree_method': 'hist', 'verbosity': 1, 'monotone_constraints': '(1, -1)' } constrained_hist_method = xgb.train(params_for_constrained_hist_method, training_dset) assert is_correctly_constrained(constrained_hist_method)
def run(channel_addr, sym_key_file, priv_key_file, cert_file): xgb.init_client(user_name=username, client_list=["user1", username], sym_key_file=sym_key_file, priv_key_file=priv_key_file, cert_file=cert_file, remote_addr=channel_addr) xgb.rabit.init() # Remote attestation print("Remote attestation") # Note: Simulation mode does not support attestation # pass in `verify=False` to attest() xgb.attest() print("Report successfully verified") print("Load training matrices") dtrain = xgb.DMatrix({"user1": HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c1_train.enc", username: HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c2_train.enc"}, encrypted=True) print("Creating test matrix") dtest1 = xgb.DMatrix({"user1": HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c1_test.enc"}) dtest2 = xgb.DMatrix({username: HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c2_test.enc"}) print("Beginning Training") # Set training parameters params = { "tree_method": "hist", "n_gpus": "0", "objective": "binary:logistic", "min_child_weight": "1", "gamma": "0.1", "max_depth": "3", "verbosity": "0" } # Train and evaluate num_rounds = 10 print("Training...") booster = xgb.train(params, dtrain, num_rounds) # Enable the other party to get its predictions _, _ = booster.predict(dtest1, decrypt=False) # Get our predictions predictions, num_preds = booster.predict(dtest2, decrypt=False) # Decrypt predictions print("Predictions: ", booster.decrypt_predictions(predictions, num_preds)[:10]) # Get fscores of model print("\nModel Feature Importance: ") print(booster.get_fscore()) xgb.rabit.finalize()
def test_multiclass(self): dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'}) dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'}) param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2} # specify validations set to watch performance watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 2 bst = xgb.train(param, dtrain, num_round, watchlist) # this is prediction preds = bst.predict(dtest)[0] #TODO(rishabh): support for get_label(), save_binary() """
def fn(max_depth, num_rounds): # train params = {'max_depth': max_depth, 'eta': 1, 'verbosity': 0} bst = xgb.train(params, dtrain, num_boost_round=num_rounds) # predict preds = bst.predict(dtest)[0] contribs = bst.predict(dtest, pred_contribs=True)[0] # result should be (number of features + BIAS) * number of rows assert contribs.shape == (dtest.num_row(), dtest.num_col() + 1) # sum of contributions should be same as predictions np.testing.assert_array_almost_equal(np.sum(contribs, axis=1), preds)
def run_interaction_constraints(self, tree_method): x1 = np.random.normal(loc=1.0, scale=1.0, size=1000) x2 = np.random.normal(loc=1.0, scale=1.0, size=1000) x3 = np.random.choice([1, 2, 3], size=1000, replace=True) y = x1 + x2 + x3 + x1 * x2 * x3 \ + np.random.normal( loc=0.001, scale=1.0, size=1000) + 3 * np.sin(x1) X = np.column_stack((x1, x2, x3)) dump_svmlight_file(X, y, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) dtrain = xgb.DMatrix({username: temp_enc_name}) params = { 'max_depth': 3, 'eta': 0.1, 'nthread': 2, 'interaction_constraints': '[[0, 1]]', 'tree_method': tree_method } num_boost_round = 12 # Fit a model that only allows interaction between x1 and x2 bst = xgb.train(params, dtrain, num_boost_round, evals=[(dtrain, 'train')]) # Set all observations to have the same x3 values then increment # by the same amount def f(x): tX = np.column_stack((x1, x2, np.repeat(x, 1000))) dump_svmlight_file(tX, y, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) tmat = xgb.DMatrix({username: temp_enc_name}) return bst.predict(tmat)[0] preds = [f(x) for x in [1, 2, 3]] # Check incrementing x3 has the same effect on all observations # since x3 is constrained to be independent of x1 and x2 # and all observations start off from the same x3 value diff1 = preds[1] - preds[0] assert np.all(np.abs(diff1 - diff1[0]) < 1e-4) diff2 = preds[2] - preds[1] assert np.all(np.abs(diff2 - diff2[0]) < 1e-4)
def run_model_pickling(self, xgb_params): X, y = generate_data() dump_svmlight_file(X, y, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) dtrain = xgb.DMatrix({username: temp_enc_name}) bst = xgb.train(xgb_params, dtrain) dump_0 = bst.get_dump(dump_format='json') assert dump_0 filename = 'model.pkl' #TODO: support pickling """
def test_glm(self): param = { 'verbosity': 0, 'objective': 'binary:logistic', 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, 'nthread': 1 } watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 4 bst = xgb.train(param, dtrain, num_round, watchlist) assert isinstance(bst, xgb.core.Booster) preds = bst.predict(dtest)[0] #TODO(rishabh): implement get_label() """
def test_pruner(self): import sklearn params = {'tree_method': 'exact'} cancer = sklearn.datasets.load_breast_cancer() X = cancer['data'] y = cancer["target"] dump_svmlight_file(X, y, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) dtrain = xgb.DMatrix({username: temp_enc_name}) booster = xgb.train(params, dtrain=dtrain, num_boost_round=10) grown = str(booster.get_dump()) params = {'updater': 'prune', 'process_type': 'update', 'gamma': '0.2'} #TODO(rishabh): add support for xgb_model """
def test_alpha_and_lambda(self): params = { 'tree_method': 'exact', 'verbosity': 1, 'objective': 'reg:squarederror', 'eta': 1, 'lambda': 1, 'alpha': 0.1 } model = xgb.train(params, train_data, 1) preds = model.predict(train_data) # Default prediction (with no trees) is 0.5 # sum_grad = (0.5 - 1.0) # sum_hess = 1.0 # 0.7 = 0.5 - (sum_grad - alpha * sgn(sum_grad)) / (sum_hess + lambda) assert_approx_equal(preds[0], 0.7)
def test_lambda(self): # train_data = xgb.DMatrix({username: temp_enc_name}) params = { 'tree_method': 'exact', 'verbosity': 0, 'objective': 'reg:squarederror', 'eta': 1, 'lambda': 1, 'alpha': 0 } model = xgb.train(params, train_data, 1) preds = model.predict(train_data)[0] print(preds) # Default prediction (with no trees) is 0.5 # sum_grad = (0.5 - 1.0) # sum_hess = 1.0 # 0.75 = 0.5 - sum_grad / (sum_hess + lambda) assert_approx_equal(preds[0], 0.75)
def test_dump(self): data = np.random.randn(100, 2) target = np.array([0, 1] * 50) dump_svmlight_file(data, target, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) features = ['Feature1', 'Feature2'] dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features) params = { 'objective': 'binary:logistic', 'eval_metric': 'logloss', 'eta': 0.3, 'max_depth': 1 } bst = xgb.train(params, dm, num_boost_round=1) # number of feature importances should == number of features dump1 = bst.get_dump() self.assertEqual(len(dump1), 1, "Expected only 1 tree to be dumped.") self.assertEqual(len(dump1[0].splitlines()), 3, "Expected 1 root and 2 leaves - 3 lines in dump.") dump2 = bst.get_dump(with_stats=True) self.assertEqual(dump2[0].count('\n'), 3, "Expected 1 root and 2 leaves - 3 lines in dump.") self.assertGreater( dump2[0].find('\n'), dump1[0].find('\n'), "Expected more info when with_stats=True is given.") dump3 = bst.get_dump(dump_format="json") dump3j = json.loads(dump3[0]) self.assertEqual(dump3j["nodeid"], 0, "Expected the root node on top.") dump4 = bst.get_dump(dump_format="json", with_stats=True) dump4j = json.loads(dump4[0]) self.assertIn("gain", dump4j, "Expected 'gain' to be dumped in JSON.")
def test_feature_names(self): data = np.random.randn(100, 5) target = np.array([0, 1] * 50) dump_svmlight_file(data, target, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) features = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5'] dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features) assert dm.feature_names == features assert dm.num_row() == 100 assert dm.num_col() == 5 params = { 'objective': 'multi:softprob', 'eval_metric': 'mlogloss', 'eta': 0.3, 'num_class': 3 } bst = xgb.train(params, dm, num_boost_round=10) scores = bst.get_fscore() assert list(sorted(k for k in scores)) == features dummy_X = np.random.randn(5, 5) dummy_Y = np.random.randn(5) dump_svmlight_file(dummy_X, dummy_Y, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features) bst.predict(dm)[0] # different feature name must raises error dm = xgb.DMatrix({username: temp_enc_name}, feature_names=list('abcde')) self.assertRaises(ValueError, bst.predict, dm)
print("Beginning Training") # Set training parameters params = { "tree_method": "hist", "n_gpus": "0", "objective": "binary:logistic", "min_child_weight": "1", "gamma": "0.1", "max_depth": "3", "verbosity": "1" } # Train and evaluate num_rounds = 5 booster = xgb.train(params, dtrain, num_rounds, evals=[(dtrain, "train"), (dtest, "test")]) booster.save_model(DIR + "/demo_model.model") # Get encrypted predictions print("\n\nModel Predictions: ") predictions, num_preds = booster.predict(dtest, decrypt=False) # Decrypt predictions print(booster.decrypt_predictions(predictions, num_preds)[:20]) xgb.rabit.finalize()
def test_feature_importances(self): data = np.random.randn(100, 5) target = np.array([0, 1] * 50) dump_svmlight_file(data, target, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) features = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5'] dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features) params = {'objective': 'multi:softprob', 'eval_metric': 'mlogloss', 'eta': 0.3, 'num_class': 3} bst = xgb.train(params, dm, num_boost_round=10) # number of feature importances should == number of features scores1 = bst.get_score() scores2 = bst.get_score(importance_type='weight') scores3 = bst.get_score(importance_type='cover') scores4 = bst.get_score(importance_type='gain') scores5 = bst.get_score(importance_type='total_cover') scores6 = bst.get_score(importance_type='total_gain') assert len(scores1) == len(features) assert len(scores2) == len(features) assert len(scores3) == len(features) assert len(scores4) == len(features) assert len(scores5) == len(features) assert len(scores6) == len(features) # check backwards compatibility of get_fscore fscores = bst.get_fscore() assert scores1 == fscores dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'}) dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'}) def fn(max_depth, num_rounds): # train params = {'max_depth': max_depth, 'eta': 1, 'verbosity': 0} bst = xgb.train(params, dtrain, num_boost_round=num_rounds) # predict preds = bst.predict(dtest)[0] contribs = bst.predict(dtest, pred_contribs=True)[0] # result should be (number of features + BIAS) * number of rows assert contribs.shape == (dtest.num_row(), dtest.num_col() + 1) # sum of contributions should be same as predictions np.testing.assert_array_almost_equal(np.sum(contribs, axis=1), preds) # for max_depth, num_rounds in itertools.product(range(0, 3), range(1, 5)): # yield fn, max_depth, num_rounds # check that we get the right SHAP values for a basic AND example # (https://arxiv.org/abs/1706.06060) X = np.zeros((4, 2)) X[0, :] = 1 X[1, 0] = 1 X[2, 1] = 1 y = np.zeros(4) y[0] = 1 param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0} dump_svmlight_file(X, y, temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) bst = xgb.train(param, xgb.DMatrix({username: temp_enc_name}), 1) dump_svmlight_file(X[0:1, :], np.zeros(1), temp_name) xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file) out = bst.predict(xgb.DMatrix({username: temp_enc_name}), pred_contribs=True)[0] #TODO(rishabh): enable pred_contribs """ assert out[0, 0] == 0.375 assert out[0, 1] == 0.375 assert out[0, 2] == 0.25 """ def parse_model(model): trees = [] r_exp = r"([0-9]+):\[f([0-9]+)<([0-9\.e-]+)\] yes=([0-9]+),no=([0-9]+).*cover=([0-9e\.]+)" r_exp_leaf = r"([0-9]+):leaf=([0-9\.e-]+),cover=([0-9e\.]+)" for tree in model.get_dump(with_stats=True): lines = list(tree.splitlines()) trees.append([None for i in range(len(lines))]) for line in lines: match = re.search(r_exp, line) if match is not None: ind = int(match.group(1)) while ind >= len(trees[-1]): trees[-1].append(None) trees[-1][ind] = { "yes_ind": int(match.group(4)), "no_ind": int(match.group(5)), "value": None, "threshold": float(match.group(3)), "feature_index": int(match.group(2)), "cover": float(match.group(6)) } else: match = re.search(r_exp_leaf, line) ind = int(match.group(1)) while ind >= len(trees[-1]): trees[-1].append(None) trees[-1][ind] = { "value": float(match.group(2)), "cover": float(match.group(3)) } return trees def exp_value_rec(tree, z, x, i=0): if tree[i]["value"] is not None: return tree[i]["value"] else: ind = tree[i]["feature_index"] if z[ind] == 1: if x[ind] < tree[i]["threshold"]: return exp_value_rec(tree, z, x, tree[i]["yes_ind"]) else: return exp_value_rec(tree, z, x, tree[i]["no_ind"]) else: r_yes = tree[tree[i]["yes_ind"]]["cover"] / tree[i]["cover"] out = exp_value_rec(tree, z, x, tree[i]["yes_ind"]) val = out * r_yes r_no = tree[tree[i]["no_ind"]]["cover"] / tree[i]["cover"] out = exp_value_rec(tree, z, x, tree[i]["no_ind"]) val += out * r_no return val def exp_value(trees, z, x): return np.sum([exp_value_rec(tree, z, x) for tree in trees]) def all_subsets(ss): return itertools.chain(*map(lambda x: itertools.combinations(ss, x), range(0, len(ss) + 1))) def shap_value(trees, x, i, cond=None, cond_value=None): M = len(x) z = np.zeros(M) other_inds = list(set(range(M)) - set([i])) if cond is not None: other_inds = list(set(other_inds) - set([cond])) z[cond] = cond_value M -= 1 total = 0.0 for subset in all_subsets(other_inds): if len(subset) > 0: z[list(subset)] = 1 v1 = exp_value(trees, z, x) z[i] = 1 v2 = exp_value(trees, z, x) total += (v2 - v1) / (scipy.special.binom(M - 1, len(subset)) * M) z[i] = 0 z[list(subset)] = 0 return total def shap_values(trees, x): vals = [shap_value(trees, x, i) for i in range(len(x))] vals.append(exp_value(trees, np.zeros(len(x)), x)) return np.array(vals) def interaction_values(trees, x): M = len(x) out = np.zeros((M + 1, M + 1)) for i in range(len(x)): for j in range(len(x)): if i != j: out[i, j] = interaction_value(trees, x, i, j) / 2 svals = shap_values(trees, x) main_effects = svals - out.sum(1) out[np.diag_indices_from(out)] = main_effects return out def interaction_value(trees, x, i, j): M = len(x) z = np.zeros(M) other_inds = list(set(range(M)) - set([i, j])) total = 0.0 for subset in all_subsets(other_inds): if len(subset) > 0: z[list(subset)] = 1 v00 = exp_value(trees, z, x) z[i] = 1 v10 = exp_value(trees, z, x) z[j] = 1 v11 = exp_value(trees, z, x) z[i] = 0 v01 = exp_value(trees, z, x) z[j] = 0 total += (v11 - v01 - v10 + v00) / (scipy.special.binom(M - 2, len(subset)) * (M - 1)) z[list(subset)] = 0 return total # test a simple and function M = 2 N = 4 X = np.zeros((N, M)) X[0, :] = 1 X[1, 0] = 1 X[2, 1] = 1 y = np.zeros(N) y[0] = 1 param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0} #TODO(rishabh): enable pred_contribs """
def run(channel_addr, sym_key_file, priv_key_file, cert_file): # Remote attestation print("Remote attestation") xgb.init_client(user_name=username, sym_key_file=sym_key_file, priv_key_file=priv_key_file, cert_file=cert_file, remote_addr=channel_addr) # Note: Simulation mode does not support attestation # pass in `verify=False` to attest() xgb.attest() print("Report successfully verified") print("Creating training matrix") dtrain = xgb.DMatrix( {username: HOME_DIR + "demo/python/remote-control/data/train.enc"}) if not dtrain: print("Error creating dtrain") return print("dtrain: " + dtrain.handle.value.decode("utf-8")) print("Creating test matrix") dtest = xgb.DMatrix( {username: HOME_DIR + "demo/python/remote-control/data/test.enc"}) if not dtest: print("Error creating dtest") return print("dtest: " + dtest.handle.value.decode("utf-8")) print("Beginning Training") # Set training parameters params = { "tree_method": "hist", "n_gpus": "0", "objective": "binary:logistic", "min_child_weight": "1", "gamma": "0.1", "max_depth": "3", "verbosity": "0" } # Train and evaluate num_rounds = 5 print("Training...") booster = xgb.train(params, dtrain, num_rounds) print("booster: " + booster.handle.value.decode("utf-8")) booster.save_model(HOME_DIR + "demo/python/remote-control/client/modelfile.model") # Get encrypted predictions print("\nModel Predictions: ") predictions, num_preds = booster.predict(dtest, decrypt=False) # Decrypt predictions print(booster.decrypt_predictions(predictions, num_preds)) # Get fscores of model print("\nModel Feature Importance: ") print(booster.get_fscore())