def genTree(node, sList=[]): nodeType = type(node) tree = Tree() if nodeType == pd.core.frame.DataFrame: df = node root = tree.create_node(tag='0', identifier=0, data=df) root.xvar = get_xvar(df) elif nodeType == Node: root = deepcopy(node) root.fpointer = [] root.bpointer = [] df = root.data tree.add_node(root) w2 = [] for split in sList: pid, x, s = split pNode = tree[pid] df = pNode.data dtype = DataTypes[x] xVal = set(df.iloc[:, x]) if (dtype == 'O') | (dtype == 'u'): if not (s < xVal): raise IndexError('invalid category split') S = powerset(xVal) idx = df.iloc[:, x].isin(s) elif (dtype == 'i') | (dtype == 'f'): S = sorted(xVal)[:-1] #leave the last element out idx = (df.iloc[:, x] <= s) childDF = [df.loc[idx], df.loc[~idx]] nodes = [None, None] cid = array([pid, pid]) * 2 + array([1, 2]) for i in range(2): if len(childDF[i]) < minObs: raise IndexError('no data in leaf') nodes[i] = tree.create_node(str(cid[i]), cid[i], parent=pid, data=childDF[i]) nodes[i].var = None nodes[i].xvar = get_xvar(childDF[i]) pNode.tag = str(pid) + '-' + str(x) + '-' + str(s) pNode.S = S pNode.var = x pNode.split = s gpid = pNode.bpointer if gpid in w2: w2.remove(gpid) w2.append(pid) tree.w2 = w2 return tree
def swap(tree): internalNodes = [n for n in tree.all_nodes_itr() if n.var != None] if len(internalNodes) == 1: return tree internalNodes.remove(tree[0]) cNode = random.choice(internalNodes) tagc = (cNode.identifier, cNode.var, cNode.split) pid = cNode.bpointer tree1 = Tree(tree, deep=True) sub = tree1.remove_subtree(pid) tags = recurTag(sub, pid) tagp = tags[0] tags[tags.index(tagc)] = (tagc[0], tagp[1], tagp[2]) tags[0] = (tagp[0], tagc[1], tagc[2]) string = f'{mi} swap {t}: {tags[0]}; ' try: sub1 = genTree(tree[pid], tags) except IndexError: print(string + 'unswappable') return tree #rTransit = 1 rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves()) rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr()) r = rLike * rStruct print(string + f'{r.round(4)}') if random.uniform(0, 1) < r: if pid > 0: gpid = tree[pid].bpointer tree1.paste(gpid, sub1) tree1[gpid].fpointer = sorted(tree1[gpid].fpointer) else: tree1 = sub1 tree1.w2 = tree.w2 tree1.R = tree.R tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0] tree1.show() return tree1 return tree
def change(tree): nidInternal = nidValid(tree) choices = [getChoice(tree, n) for n in nidInternal] n_choices = map(lambda L: sum([len(i) for i in L]), choices) choiceDic = { a: b for (a, b, c) in zip(nidInternal, choices, n_choices) if c > 1 } choices1 = list(choiceDic.keys()) nid = random.choice(choices1) p = tree[nid].data.shape[1] x0 = tree[nid].var s0 = tree[nid].split choices = choiceDic[nid] # choose nid to split if s0 in choices[x0 - 1]: choices[x0 - 1].remove(s0) # remove original split option choices2 = [i for i in range(p - 1) if len(choices[i]) > 0] # choose var to split x = random.choice(choices2) choices3 = choices[x] # choose value to split x += 1 s = random.choice(choices3) tree1 = Tree(tree, deep=True) pid = tree1[nid].bpointer sub = tree1.remove_subtree(nid) tags = recurTag(sub, nid) tags[0] = (nid, x, s) try: sub1 = genTree(sub[nid], tags) except IndexError: print(f'{mi} change {t}: {tags[0]}; unchangable') return tree if pid is not None: tree1.paste(pid, sub1) tree1[pid].fpointer = sorted(tree1[pid].fpointer) else: tree1 = sub1 nidInternal1 = set(nidValid(tree1)) choices1 = set(choices1) choices11 = nidInternal1.intersection(choices1) extra = nidInternal1 - choices1 n_choices = map(lambda L: sum([len(i) for i in L]), [getChoice(tree1, n) for n in extra]) choices11 = list(choices11) + [ a for (a, b) in zip(extra, n_choices) if b > 1 ] choices31 = getChoice(tree1, nid, x0)[x0 - 1] n31 = len(choices31) if (sub1[nid].var == sub[nid].var) and (s0 in choices31): n31 -= 1 rTransit = len(choices1) * len(choices3) / (len(choices11) * n31) rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves()) rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr()) r = rLike * rTransit * rStruct print(f'{mi} change {t}: {tags[0]}; r={r.round(4)}') if random.uniform(0, 1) < r: tree1.w2 = tree.w2 tree1.R = tree.R tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0] tree1.show() return tree1 return tree
from scipy import stats gg = gamma.cdf(lamda, ig1, scale=1 / ig2) g = stats.invgamma.cdf(var, ig1, scale=ig2) mumu = (y.min() + y.max()) * 0.5 / m sigma_mu = (y.max() - m * mumu) / (k * sqrt(m)) var_mu = sigma_mu**2 tau = 1 / var_mu taumu = tau * mumu DataTypes = df0.dtypes.map(lambda x: x.kind) #%% tree = Tree() root = tree.create_node('0', 0, data=df0) root.xvar = get_xvar(df0) root.var = None tree.w2 = [] tree.leaf = [0] ProbDefault = array([2.5, 2.5, 4]).cumsum() / 9 ## tree = trueTree T = 1250 burn = 250 trees = [deepcopy(tree) for i in range(m)] MM = pd.DataFrame(index=df0.index, columns=range(m), data=mumu) Yhat = zeros((n0, T)) Depth_mu = zeros(T) tdic = [None for i in range(m * T)] #tdic = [tdic.copy() for i in range(T)] def tree2dic(tree): return {