Beispiel #1
0
def genTree(node, sList=[]):
    nodeType = type(node)
    tree = Tree()
    if nodeType == pd.core.frame.DataFrame:
        df = node
        root = tree.create_node(tag='0', identifier=0, data=df)
        root.xvar = get_xvar(df)
    elif nodeType == Node:
        root = deepcopy(node)
        root.fpointer = []
        root.bpointer = []
        df = root.data
        tree.add_node(root)
    w2 = []
    for split in sList:
        pid, x, s = split
        pNode = tree[pid]
        df = pNode.data
        dtype = DataTypes[x]
        xVal = set(df.iloc[:, x])
        if (dtype == 'O') | (dtype == 'u'):
            if not (s < xVal): raise IndexError('invalid category split')
            S = powerset(xVal)
            idx = df.iloc[:, x].isin(s)
        elif (dtype == 'i') | (dtype == 'f'):
            S = sorted(xVal)[:-1]  #leave the last element out
            idx = (df.iloc[:, x] <= s)
        childDF = [df.loc[idx], df.loc[~idx]]
        nodes = [None, None]
        cid = array([pid, pid]) * 2 + array([1, 2])
        for i in range(2):
            if len(childDF[i]) < minObs: raise IndexError('no data in leaf')
            nodes[i] = tree.create_node(str(cid[i]),
                                        cid[i],
                                        parent=pid,
                                        data=childDF[i])
            nodes[i].var = None
            nodes[i].xvar = get_xvar(childDF[i])
        pNode.tag = str(pid) + '-' + str(x) + '-' + str(s)
        pNode.S = S
        pNode.var = x
        pNode.split = s
        gpid = pNode.bpointer
        if gpid in w2: w2.remove(gpid)
        w2.append(pid)
    tree.w2 = w2
    return tree
Beispiel #2
0
def swap(tree):
    internalNodes = [n for n in tree.all_nodes_itr() if n.var != None]
    if len(internalNodes) == 1: return tree
    internalNodes.remove(tree[0])
    cNode = random.choice(internalNodes)
    tagc = (cNode.identifier, cNode.var, cNode.split)
    pid = cNode.bpointer
    tree1 = Tree(tree, deep=True)
    sub = tree1.remove_subtree(pid)
    tags = recurTag(sub, pid)
    tagp = tags[0]
    tags[tags.index(tagc)] = (tagc[0], tagp[1], tagp[2])
    tags[0] = (tagp[0], tagc[1], tagc[2])
    string = f'{mi} swap {t}: {tags[0]}; '
    try:
        sub1 = genTree(tree[pid], tags)
    except IndexError:
        print(string + 'unswappable')
        return tree
    #rTransit = 1
    rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves())
    rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr())
    r = rLike * rStruct
    print(string + f'{r.round(4)}')
    if random.uniform(0, 1) < r:
        if pid > 0:
            gpid = tree[pid].bpointer
            tree1.paste(gpid, sub1)
            tree1[gpid].fpointer = sorted(tree1[gpid].fpointer)
        else:
            tree1 = sub1
        tree1.w2 = tree.w2
        tree1.R = tree.R
        tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0]
        tree1.show()
        return tree1
    return tree
Beispiel #3
0
def change(tree):
    nidInternal = nidValid(tree)
    choices = [getChoice(tree, n) for n in nidInternal]
    n_choices = map(lambda L: sum([len(i) for i in L]), choices)
    choiceDic = {
        a: b
        for (a, b, c) in zip(nidInternal, choices, n_choices) if c > 1
    }
    choices1 = list(choiceDic.keys())
    nid = random.choice(choices1)
    p = tree[nid].data.shape[1]
    x0 = tree[nid].var
    s0 = tree[nid].split
    choices = choiceDic[nid]  # choose nid to split
    if s0 in choices[x0 - 1]:
        choices[x0 - 1].remove(s0)  # remove original split option
    choices2 = [i for i in range(p - 1)
                if len(choices[i]) > 0]  # choose var to split
    x = random.choice(choices2)
    choices3 = choices[x]  # choose value to split
    x += 1
    s = random.choice(choices3)
    tree1 = Tree(tree, deep=True)
    pid = tree1[nid].bpointer
    sub = tree1.remove_subtree(nid)
    tags = recurTag(sub, nid)
    tags[0] = (nid, x, s)
    try:
        sub1 = genTree(sub[nid], tags)
    except IndexError:
        print(f'{mi} change {t}: {tags[0]}; unchangable')
        return tree
    if pid is not None:
        tree1.paste(pid, sub1)
        tree1[pid].fpointer = sorted(tree1[pid].fpointer)
    else:
        tree1 = sub1
    nidInternal1 = set(nidValid(tree1))
    choices1 = set(choices1)
    choices11 = nidInternal1.intersection(choices1)
    extra = nidInternal1 - choices1
    n_choices = map(lambda L: sum([len(i) for i in L]),
                    [getChoice(tree1, n) for n in extra])
    choices11 = list(choices11) + [
        a for (a, b) in zip(extra, n_choices) if b > 1
    ]
    choices31 = getChoice(tree1, nid, x0)[x0 - 1]
    n31 = len(choices31)
    if (sub1[nid].var == sub[nid].var) and (s0 in choices31):
        n31 -= 1
    rTransit = len(choices1) * len(choices3) / (len(choices11) * n31)
    rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves())
    rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr())
    r = rLike * rTransit * rStruct
    print(f'{mi} change {t}: {tags[0]}; r={r.round(4)}')
    if random.uniform(0, 1) < r:
        tree1.w2 = tree.w2
        tree1.R = tree.R
        tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0]
        tree1.show()
        return tree1
    return tree
Beispiel #4
0
from scipy import stats
gg = gamma.cdf(lamda, ig1, scale=1 / ig2)
g = stats.invgamma.cdf(var, ig1, scale=ig2)
mumu = (y.min() + y.max()) * 0.5 / m
sigma_mu = (y.max() - m * mumu) / (k * sqrt(m))
var_mu = sigma_mu**2
tau = 1 / var_mu
taumu = tau * mumu

DataTypes = df0.dtypes.map(lambda x: x.kind)
#%%
tree = Tree()
root = tree.create_node('0', 0, data=df0)
root.xvar = get_xvar(df0)
root.var = None
tree.w2 = []
tree.leaf = [0]
ProbDefault = array([2.5, 2.5, 4]).cumsum() / 9
## tree = trueTree
T = 1250
burn = 250
trees = [deepcopy(tree) for i in range(m)]
MM = pd.DataFrame(index=df0.index, columns=range(m), data=mumu)
Yhat = zeros((n0, T))
Depth_mu = zeros(T)
tdic = [None for i in range(m * T)]
#tdic = [tdic.copy() for i in range(T)]


def tree2dic(tree):
    return {