def expandWithoutRatio(hlca, nclass=2, n_values=2, min_split=3, max_level=2): if max_level == 0: return for h in range(hlca.nclass): if len(hlca.subdata[h]) > min_split: lca = LCA(max_nclass=nclass, n_values=n_values) lca.fit(hlca.subdata[h]) if lca.bic >= (hlca.lca.bic - 1): continue chlca = HLCA(lca, hlca.subdata[h], None, h, hlca.classThreshold, hlca) expandWithoutRatio(chlca, nclass, n_values, min_split, max_level - 1)
def testLCA(self): self.assertEqual(LCA.findLCA(T, 1, 2.5).data, 2) self.assertEqual(LCA.findLCA(T, 3, 3).data, 3) self.assertEqual(LCA.findLCA(T, 8, 2.5).data, 3) self.assertEqual(LCA.findLCA(T, 4, 3).data, 3) self.assertEqual(LCA.findLCA(T, 10, 2).data, 2) self.assertEqual(LCA.findLCA(T, 2, 1).data, 2) self.assertEqual(LCA.findLCA(T, 8, 0.2).data, 3)
def expand(hlca, nclass=2, n_values=2, min_ratio=0.75, min_split=3, max_level=2): if max_level == 0: return ratio, subdata = hlca.ratio, hlca.subdata for h in range(hlca.nclass): if ratio[h] < min_ratio and ratio[h] > (1 - min_ratio) and len( subdata[h]) > min_split: lca = LCA(max_nclass=nclass, n_values=n_values) lca.fit(subdata[h]) if lca.bic >= (hlca.lca.bic - 1): continue chlca = HLCA(lca, subdata[h], hlca.subY[h], h, hlca.classThreshold, hlca) expand(chlca, nclass, n_values, min_ratio, min_split, max_level - 1)
class RMQ: def __init__(self, data): if not data: raise ValueError('data must not be empty') tree = CartesianTree() self.indices = tree.update(data) self.lca = LCA(tree.root) def __getitem__(self, indices): if isinstance(indices, slice): if indices.step is not None: raise NotImplementedError('rmq supports only continues sequences') i = indices.start j = indices.stop i = 0 if i is None else i j = len(self) - 1 if j is None else j if not (0 <= i, j < len(self)): raise IndexError('index out of range') return self.lca.get_lca(self.indices[i], self.indices[j]) def __len__(self): return len(self.indices)
from lca import LCA from hierachicalLCA import HLCA, expand from anytree import RenderTree if __name__ == '__main__': import pandas as pd import numpy as np mog = pd.read_csv('mixture-of-gaussian.csv', header=None) mog = np.round(mog) mog = mog.astype(int) mog = mog + 5 # print(mog) # print(np.max(mog, axis=0)) lca = LCA(max_nclass=2, n_values=24) lca.fit(mog.values) print("lca.qh =\n{}".format(lca.qh)) print("lca.logp =\n{}".format(lca.logp)) Y = np.array([i % 2 for i in range(len(mog))]) Y = Y.reshape(-1, 1) root = HLCA(lca, mog, Y, 0) expand(root, 2, 24) for pre, _, node in RenderTree(root): print(pre, end='') print("{} {} {}".format(node.upperClass, len(node.data), node.ratio))
labels=range(6)) tt['hascabin'] = tt.CabinNum.apply(lambda c: 1 if c > 0 else 0) data = tt[keeps] Y = tt["Survived"] # print(np.max(data, axis=0)) # N_VALUES = [7, 4, 8, 6, 7, 4, 3, 9, 12, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 9, 4, 3, 5, 3, 3, 6] # for female # N_VALUES = [7, 4, 8, 6, 7, 4, 3, 9, 12, 2, 2, 2, 2, 2, 2, 4, 3, 5, 3, 3, 6] # for male N_VALUES = [ 7, 2, 6, 4, 3, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 9, 4, 3, 5, 3, 3, 6 ] lca = LCA(max_nclass=root_nclass, n_values=N_VALUES) lca.fit(data) root = HLCA(lca, data, Y, 0, 0.35) expand(root, node_nclass, N_VALUES, 0.89, min_split, max_level=max_level) for pre, _, node in RenderTree(root): print(pre, end='') print("{} {} {} {}".format(node.upperClass, len(node.data), node.ratio, [len(sd) for sd in node.subdata])) opred = predict(data, root) pred = np.round(opred) print(sum(pred == tt.Survived.values) / float(len(data))) print("train accurate: ")
keeps = [ "Age", "Embarked", "Entourage", "Fare", "Parch", "Pclass", "Sex", "SibSp", "FamilySize", "ischild", "isfather", "ismother", "ishusband", "iswife", "isentourage", "aliveChildren", "aliveFather", "aliveMother", "aliveHusband", "aliveWife", "aliveEntourage" ] data = tt[keeps].values data0 = tt0[keeps].values data1 = tt1[keeps].values Y = tt["Survived"].values # print(np.max(data, axis=0)) N_VALUES = [7, 4, 8, 6, 7, 4, 3, 9, 12, 2, 2, 2, 2, 2, 2, 4, 3, 5, 3, 3, 5] lca = LCA(max_nclass=9, n_values=N_VALUES) lca.fit(data0) root0 = HLCA(lca, data0, None, 0, 0.35) expandWithoutRatio(root0, 5, N_VALUES, 9, max_level=3) lca = LCA(max_nclass=2, n_values=N_VALUES) lca.fit(data1) root1 = HLCA(lca, data1, None, 0, 0.35) expandWithoutRatio(root1, 5, N_VALUES, 9, max_level=3) for pre, _, node in RenderTree(root0): print(pre, end='') print("{} {} {}".format(node.upperClass, len(node.data), [len(sd) for sd in node.subdata]))
def testBroken(self): self.assertEqual(LCA.findLCA(T, 1, None).data, 1) self.assertIsNone(LCA.findLCA(None, 1, 2.5)) self.assertIsNone(LCA.findLCA(T, None, None)) self.assertIsNone(LCA.findLCA(None, None, None))
def __init__(self, data): if not data: raise ValueError('data must not be empty') tree = CartesianTree() self.indices = tree.update(data) self.lca = LCA(tree.root)