def generateSplitNode(self, datas, prenode, depth, maxD): t_r = [] t_start = datas[0].now() if depth >= maxD: tNode = node((t_start, -1), datas) tNode.word_type = 'Continue' return [tNode] t_num = {} for data in datas: t_next = data.nextLoc() if t_next not in t_num: t_num[t_next] = [] t_num[t_next].append(data) t_v = [] for key in t_num: if float(len(t_num[key])) / float(len(datas)) >= self.R and len( t_num[key]) >= self.C: t_node = node((t_start, key), t_num[key]) t_node.getNodeType() t_node.upDataByType() if t_start != key: if t_node.word_type != 'F': t_node.children = t_node.children + self.generateSplitNode( t_num[key], t_node, depth + 1, maxD) t_r.append(t_node) else: childNodes = t_node.splitNode() for childnode in childNodes: childnode.children = childnode.children + self.generateSplitNode( childnode.ids, childnode, depth + 1, maxD) t_r.append(childnode) # t_node.children = t_node.children + self.generate_node(t_num[key]) else: t_v.extend(t_num[key]) if len(t_v) > 0: if len(t_v) < self.C: t_node = node((t_start, -1), t_v) t_node.word_type = 'LV' t_r.append(t_node) else: maxData = max([m.now() for m in t_v]) print(maxData, t_start) if maxData > t_start: for pridata in t_v: pridata.updateLo() if prenode.word_type != 'LV': t_node = node((t_start, maxData), t_v) t_node.word_type = 'LV' t_node.children = t_node.children + self.generateSplitNode( t_v, t_node, depth + 1, maxD + 1) t_r.append(t_node) else: t_r.extend( self.generateSplitNode(t_v, prenode, depth + 1, maxD)) else: t_node = node((t_start, -1), t_v) t_node.word_type = 'LV' t_r.append(t_node) return t_r
def generate_node(self, n_data): t_r = [] t_start = n_data[0].now() t_num = {} for data in n_data: nextLoc = data.nextLoc() if nextLoc not in t_num: t_num[nextLoc] = [] t_num[nextLoc].append(data) t_v = [] for key in t_num: if float(len(t_num[key])) / float(len(n_data)) >= self.R and len( t_num[key]) >= self.C: t_node = node((t_start, key), t_num[key]) t_node.getNodeType() t_node.upDataByType() if t_start != key: t_node.children = t_node.children + self.generate_node( t_num[key]) t_r.append(t_node) else: t_v.append(t_num[key]) if len(t_v) > 0: for pridata in t_v: pridata.updateLo() t_node = node((t_start, -1), t_v) t_r.append(t_node) return t_r
def generateSplitNT(self, h=10): t_ids = [] self.tree = node() self.tree.word_type = 'root' self.tree.children = self.generateSplitNode(self.datas, self.tree, 0, h) return self.tree
def generate_T(self): t_ids = [] for id in self.datas: t_ids.append(id) self.tree = node() self.tree.children = self.generate_node(t_ids) return self.tree
def generate_node(self, datas): t_r = [] t_start = datas[0].now() t_num = {} for data in datas: t_next = data.nextLoc() if t_next not in t_num: t_num[t_next] = [] t_num[t_next].append(data) t_v = [] for key in t_num: if float(len(t_num[key])) / float(len(datas)) >= self.R and len( t_num[key]) >= self.C: t_node = node((t_start, key), t_num[key]) if t_start != key: t_node.children = t_node.children + self.generate_node( t_num[key]) t_r.append(t_node) else: t_v.extend(t_num[key]) if len(t_v) > 0: t_node = node((t_start, -1), t_v) t_r.append(t_node) return t_r
def generate_T(self): t_ids = [] self.tree = node() self.tree.children = self.generate_node(self.datas) return self.tree