コード例 #1
0
def expandWithoutRatio(hlca, nclass=2, n_values=2, min_split=3, max_level=2):
    if max_level == 0:
        return

    for h in range(hlca.nclass):
        if len(hlca.subdata[h]) > min_split:
            lca = LCA(max_nclass=nclass, n_values=n_values)
            lca.fit(hlca.subdata[h])
            if lca.bic >= (hlca.lca.bic - 1):
                continue

            chlca = HLCA(lca, hlca.subdata[h], None, h, hlca.classThreshold,
                         hlca)
            expandWithoutRatio(chlca, nclass, n_values, min_split,
                               max_level - 1)
コード例 #2
0
 def testLCA(self):
     self.assertEqual(LCA.findLCA(T, 1, 2.5).data, 2)
     self.assertEqual(LCA.findLCA(T, 3, 3).data, 3)
     self.assertEqual(LCA.findLCA(T, 8, 2.5).data, 3)
     self.assertEqual(LCA.findLCA(T, 4, 3).data, 3)
     self.assertEqual(LCA.findLCA(T, 10, 2).data, 2)
     self.assertEqual(LCA.findLCA(T, 2, 1).data, 2)
     self.assertEqual(LCA.findLCA(T, 8, 0.2).data, 3)
コード例 #3
0
def expand(hlca,
           nclass=2,
           n_values=2,
           min_ratio=0.75,
           min_split=3,
           max_level=2):
    if max_level == 0:
        return

    ratio, subdata = hlca.ratio, hlca.subdata

    for h in range(hlca.nclass):
        if ratio[h] < min_ratio and ratio[h] > (1 - min_ratio) and len(
                subdata[h]) > min_split:
            lca = LCA(max_nclass=nclass, n_values=n_values)
            lca.fit(subdata[h])
            if lca.bic >= (hlca.lca.bic - 1):
                continue

            chlca = HLCA(lca, subdata[h], hlca.subY[h], h, hlca.classThreshold,
                         hlca)
            expand(chlca, nclass, n_values, min_ratio, min_split,
                   max_level - 1)
コード例 #4
0
class RMQ:
    def __init__(self, data):
        if not data:
            raise ValueError('data must not be empty')
        tree = CartesianTree()
        self.indices = tree.update(data)
        self.lca = LCA(tree.root)

    def __getitem__(self, indices):
        if isinstance(indices, slice):
            if indices.step is not None:
                raise NotImplementedError('rmq supports only continues sequences')
            i = indices.start
            j = indices.stop
            i = 0 if i is None else i
            j = len(self) - 1 if j is None else j
            if not (0 <= i, j < len(self)):
                raise IndexError('index out of range')
            return self.lca.get_lca(self.indices[i], self.indices[j])

    def __len__(self):
        return len(self.indices)
コード例 #5
0
from lca import LCA
from hierachicalLCA import HLCA, expand
from anytree import RenderTree

if __name__ == '__main__':
    import pandas as pd
    import numpy as np

    mog = pd.read_csv('mixture-of-gaussian.csv', header=None)
    mog = np.round(mog)
    mog = mog.astype(int)
    mog = mog + 5
    # print(mog)
    # print(np.max(mog, axis=0))

    lca = LCA(max_nclass=2, n_values=24)
    lca.fit(mog.values)

    print("lca.qh =\n{}".format(lca.qh))
    print("lca.logp =\n{}".format(lca.logp))

    Y = np.array([i % 2 for i in range(len(mog))])
    Y = Y.reshape(-1, 1)

    root = HLCA(lca, mog, Y, 0)

    expand(root, 2, 24)

    for pre, _, node in RenderTree(root):
        print(pre, end='')
        print("{} {} {}".format(node.upperClass, len(node.data), node.ratio))
コード例 #6
0
                        labels=range(6))
    tt['hascabin'] = tt.CabinNum.apply(lambda c: 1 if c > 0 else 0)

    data = tt[keeps]
    Y = tt["Survived"]

    # print(np.max(data, axis=0))

    # N_VALUES = [7, 4, 8, 6, 7, 4, 3, 9, 12, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 9, 4, 3, 5, 3, 3, 6]
    # for female
    # N_VALUES = [7, 4, 8, 6, 7, 4, 3, 9, 12, 2, 2, 2, 2, 2, 2, 4, 3, 5, 3, 3, 6]
    # for male
    N_VALUES = [
        7, 2, 6, 4, 3, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 9, 4, 3, 5, 3, 3, 6
    ]
    lca = LCA(max_nclass=root_nclass, n_values=N_VALUES)
    lca.fit(data)
    root = HLCA(lca, data, Y, 0, 0.35)

    expand(root, node_nclass, N_VALUES, 0.89, min_split, max_level=max_level)

    for pre, _, node in RenderTree(root):
        print(pre, end='')
        print("{} {} {} {}".format(node.upperClass, len(node.data), node.ratio,
                                   [len(sd) for sd in node.subdata]))

    opred = predict(data, root)
    pred = np.round(opred)
    print(sum(pred == tt.Survived.values) / float(len(data)))

    print("train accurate: ")
コード例 #7
0
    keeps = [
        "Age", "Embarked", "Entourage", "Fare", "Parch", "Pclass", "Sex",
        "SibSp", "FamilySize", "ischild", "isfather", "ismother", "ishusband",
        "iswife", "isentourage", "aliveChildren", "aliveFather", "aliveMother",
        "aliveHusband", "aliveWife", "aliveEntourage"
    ]

    data = tt[keeps].values
    data0 = tt0[keeps].values
    data1 = tt1[keeps].values
    Y = tt["Survived"].values

    # print(np.max(data, axis=0))

    N_VALUES = [7, 4, 8, 6, 7, 4, 3, 9, 12, 2, 2, 2, 2, 2, 2, 4, 3, 5, 3, 3, 5]
    lca = LCA(max_nclass=9, n_values=N_VALUES)
    lca.fit(data0)
    root0 = HLCA(lca, data0, None, 0, 0.35)

    expandWithoutRatio(root0, 5, N_VALUES, 9, max_level=3)

    lca = LCA(max_nclass=2, n_values=N_VALUES)
    lca.fit(data1)
    root1 = HLCA(lca, data1, None, 0, 0.35)

    expandWithoutRatio(root1, 5, N_VALUES, 9, max_level=3)

    for pre, _, node in RenderTree(root0):
        print(pre, end='')
        print("{} {} {}".format(node.upperClass, len(node.data),
                                [len(sd) for sd in node.subdata]))
コード例 #8
0
 def testBroken(self):
     self.assertEqual(LCA.findLCA(T, 1, None).data, 1)
     self.assertIsNone(LCA.findLCA(None, 1, 2.5))
     self.assertIsNone(LCA.findLCA(T, None, None))
     self.assertIsNone(LCA.findLCA(None, None, None))
コード例 #9
0
 def __init__(self, data):
     if not data:
         raise ValueError('data must not be empty')
     tree = CartesianTree()
     self.indices = tree.update(data)
     self.lca = LCA(tree.root)