class Xml2Obj: '''transform XML to Object''' def __init__(self): self.root = None self.nodeStack = [] self.numberCount = 0 self.tree = Tree() def StartElement(self, name, attributes): 'Expat start element event handler' # put the element into stack and make it become child_element if self.nodeStack: parent = self.nodeStack[-1] # make instance of class element = Element(name, attributes, self.tree, self.numberCount, parent.number) parent.addChild(element) self.tree = parent.getTree() else: element = self.root = Element( name, attributes, self.tree, self.numberCount, None) self.tree.create_node(self.root.name, self.root.number) self.numberCount += 1 self.nodeStack.append(element) def EndElement(self, name): 'Expat end element event handler' if self.nodeStack[-1].cdata: self.tree.get_node( self.nodeStack[-1].number).tag += self.nodeStack[-1].cdata self.nodeStack.pop() def CharacterData(self, data): '''Expat character data event handler''' if data.strip(): element = self.nodeStack[-1] element.cdata += data def showTree(self): self.tree.show(key=lambda node:node.identifier) # In https://github.com/caesar0301/treelib/pull/180 : # self.tree.show(sorting=False) def toDot(self, file): self.tree.to_graphviz(file) # In https://github.com/caesar0301/treelib/pull/179 : # self.tree.to_graphviz(file, sorting=False) def Parse(self, filename): # create Expat analyzer Parser = expat.ParserCreate() # Set the Expat event handlers to our methods Parser.StartElementHandler = self.StartElement Parser.EndElementHandler = self.EndElement Parser.CharacterDataHandler = self.CharacterData # analyz XML file Parser.Parse(open(filename).read(), 1) return self.root
class Blockchain(object): def __init__(self, genesis): # TODO: figure out if genesis should be passed in or created here # self.tinput = tinput self.blockCount = 0 self.blockchain = Tree() self.genesis = genesis self.addGenesisBlock(genesis) #Add the genesis block to chain def addGenesisBlock(self, genesis): self.blockchain.create_node("Genesis Block" + " ID: " + genesis.proofOfWork[:12], genesis.proofOfWork, data=genesis) def printBlockchain(self): self.blockchain.show() def addBlock(self, block): # TODO: run proof of work verification before adding block # Add block to chain & return true if POW valid # Else return false self.blockCount += 1 self.blockchain.create_node("Block " + str(self.blockCount) + " ID: " + block.proofOfWork[:12], block.proofOfWork, parent=block.prevBlockHash, data=block) def getGenesisID(self): return self.blockchain.root def getLongestChainBlocks(self): allNodes = self.blockchain.all_nodes() forkNum = 0 #number of leaves at longest branch treeDepth = self.blockchain.depth() longestPathLeaves = [ ] #WIll hold leaves with treeDepth depth ie longest branch(es) for node in allNodes: currentDepth = self.blockchain.depth(node) if (currentDepth == treeDepth): forkNum += 1 longestPathLeaves.append(node) return forkNum, longestPathLeaves def blockchainLength(self): # returns the depth of the tree ie the length of # the longest chain return self.blockchain.depth() def numBlocks(self): return self.blockchain.size() def printChain(self, chain): chain.show(data_property="humanID") def tailBlocks(self, chain): leaves = chain.leaves() print("Num leaves" + str(len(leaves))) print(leaves) def checkBlock(self): # Check the proof work work # return true if proof of work is valid # else rerturn false print("printing block") def createBlockchainGraph(self, outfilename): print("creating graph") self.blockchain.to_graphviz(filename=outfilename + '.gv', shape=u'box', graph=u'digraph') g = Source.from_file(outfilename + '.gv') g.render() def createBlockchainImg(self, outfilename): print("creating graph") self.blockchain.to_graphviz(filename=outfilename + '.gv', shape=u'box', graph=u'digraph') g = Source.from_file(outfilename + '.png') g.render()
class Match_base: def __init__(self): self.token_list = None self.index = 0 self.token = '' self.token_node = None self.tree = Tree() self.anls_proc = [] self.res = True # self.info = [] self.info = '' def set_tokenList(self, token_list): self.token_list = token_list self.index = 0 self.token = self.token_list[self.index].tag self.token_node = self.token_list[self.index] self.tree = Tree() self.anls_proc = [] self.res = True # self.info = [] self.info = '' def get_next(self, parent): tmp = self.index - len(self.anls_proc) if tmp < 0: tmp = 0 self.index += 1 for i in range(tmp + 1): if self.index - tmp + i < len(self.token_list): self.anls_proc.append(self.token_list[self.index - tmp + i].tag) if self.token is not None: self.tree.create_node(tag=self.token, identifier=str(uuid.uuid1()), parent=parent) if self.index >= len(self.token_list) - 1: self.index += 1 self.token = '#' self.anls_proc.append(self.token) return self.token else: self.index += 1 self.token = self.token_list[self.index].tag self.token_node = self.token_list[self.index] return self.token def reset_token(self, re_num=-1): if re_num == -1: self.index = 0 self.anls_proc.clear() self.token = self.token_list[self.index].tag self.token_node = self.token_list[self.index] else: self.index -= re_num for i in range(re_num): self.anls_proc.pop(len(self.anls_proc) - 1) self.token = self.token_list[self.index].tag self.token_node = self.token_list[self.index] def creat_node(self, name, parent): iid = str(uuid.uuid1()) if self.tree.size() == 0: self.tree.create_node(tag='{}'.format(name), identifier=iid) else: self.tree.create_node(tag='{}'.format(name), identifier=iid, parent=parent) return iid def func_main(self, parent): return False def is_var(self): res = self.token.isidentifier() if self.token in { "void", "main", "short", "long", "int", "double", "float", "while", "if", "else", "for", "break", "return" }: res = False return res def is_const(self): return self.token.isdigit() def run(self, flag): self.res = self.func_main('root') if self.res is True: if len(self.token_list) > len(self.anls_proc): self.info = 'error: {}, token: {}, row: {}, col: {}\n'.format( 'unmatched char', self.token_node.tag, self.token_node.row, self.token_node.col) if flag: self.res = False if self.index == 0: self.index += 1 if len(self.info) == 0: self.info = 'all ok' return self.res, self.index - 1, self.tree, self.info def create_dotPic(self, root_dir): if not os.path.exists(root_dir): os.makedirs(root_dir) self.tree.to_graphviz(filename='{}/tree.dot'.format(root_dir)) string = open('{}/tree.dot'.format(root_dir)).read() dot = graphviz.Source(string) dot.render('{}/tree'.format(root_dir), format='png')
class Parser_analyzer: """ 语句LL(1)文法: NEED:expr, 各种终止符 NOTE:int_t为无法解决: A -> B int B -> int b | ϵ 类型的回溯问题采用的特殊方案, 出现在int_t main()位置。 """ def __init__(self): self.Vn = [] # 非终结符 self.Vt = [] # 终结符 self.table = None # 预测分析表 self.stack_anls = [] self.stack_toke = [] self.err_info = [] self.AST_Tree = Tree() self.AST_Tree_root = None self.parent_uid = None self.node_parent_dict = None self.current_anal_scope = 0 def load_analyzer(self, prod_path, ff_path): prod_set = {} prod_set_ori = open(prod_path, 'r', encoding='utf-8').readlines() temp_prod = '' for item in prod_set_ori: item = item.strip() if item[0] != '|': temp = item.split(' ') temp_prod = temp[0] res = '' for ii in temp[2:]: res += '{} '.format(ii) res = res.strip() prod_set[temp_prod] = [] prod_set[temp_prod].append(res) if temp_prod not in self.Vn: self.Vn.append(temp_prod) else: temp = item.split(' ') res = '' for ii in temp[1:]: res += '{} '.format(ii) res = res.strip() prod_set[temp_prod].append(res) ff_set = {} ff_set_ori = open(ff_path, 'r', encoding='utf-8').readlines() for item in ff_set_ori: item = item.replace('\n', '') item = item.split('\t') end_symbol = item[0] eps_flag = item[1] fi_set = item[2].split(' ') if len(item) == 4: fo_set = item[3].split(' ') else: fo_set = [] ff_set[end_symbol] = { 'eps_flag': eps_flag, 'fi_set': fi_set, 'fo_set': fo_set } self.table = [[] for row in range(len(self.Vn))] # 预测分析表 for item in self.Vn: item_prod = prod_set[item] item_ff = ff_set[item] if item_ff['eps_flag'] == 'true': item_ff['fi_set'].remove('eps') for non in item_ff['fi_set']: if non not in self.Vt: self.Vt.append(non) for n in range(len(self.Vn)): self.table[n].append('') aim_prod = None aim2_prod = None for temp_prod in item_prod: temp_shit = temp_prod.split(' ') temp_first = temp_shit[0] if temp_first == 'eps' and len(temp_shit) > 1: aim2_prod = temp_prod if non == temp_first: aim_prod = temp_prod break elif temp_first in ff_set: if non in ff_set[temp_first]['fi_set'] or ff_set[ temp_first]['eps_flag'] == 'true': aim_prod = temp_prod break if aim_prod is None: aim_prod = aim2_prod self.table[self.Vn.index(item)][self.Vt.index(non)] = aim_prod if item_ff['eps_flag'] == 'true': for non in item_ff['fo_set']: if non not in self.Vt: self.Vt.append(non) for n in range(len(self.Vn)): self.table[n].append('') self.table[self.Vn.index(item)][self.Vt.index(non)] = 'eps' def load_stack(self, token_list, start): self.stack_anls = [] self.stack_anls.append('#') self.stack_anls.append(start) self.stack_toke = [] self.stack_toke.append('#') temp = list(reversed(token_list)) self.stack_toke.extend(temp) self.err_info = [] self.node_parent_dict = {start: [None]} def table_show(self): res = '' # print(self.Vt) res += "{}\n".format(str(self.Vt)) idx = 0 for item in self.table: # print('{}'.format(self.Vn[idx]), end='\t') res += "{}\t".format(self.Vn[idx]) idx2 = 0 for jt in item: # print('\'{}\'({})'.format(jt, self.Vt[idx2]), end=' ') res += "'{}'({}) ".format(jt, self.Vt[idx2]) idx2 += 1 # print() res += '\n' idx += 1 return res def ans_show(self): print(self.stack_anls) print(self.stack_toke) print() def creat_node(self, tag, parent, data): if self.AST_Tree.size() == 0: node = self.AST_Tree.create_node(tag='{}'.format(tag), data=data) self.AST_Tree_root = node else: node = self.AST_Tree.create_node(tag='{}'.format(tag), parent=parent, data=data) return node.identifier def create_dotPic(self, root_dir): # root_dir = './treePic' self.AST_Tree.to_graphviz(filename='{}/tree.dot'.format(root_dir)) string = open('{}/tree.dot'.format(root_dir)).read() dot = graphviz.Source(string) dot.render('{}/tree'.format(root_dir), format='png') def run(self, log=False): anlsRes = '' anlsLog = '' toke = self.stack_toke.pop(-1) symbol = self.stack_anls.pop(-1) while symbol != '#': if symbol in [toke.tag, toke.type]: # 刷新作用域 if symbol == '{': self.current_anal_scope += 1 elif symbol == '}': self.current_anal_scope -= 1 else: toke.set_scope(self.current_anal_scope) # 刷新真值 if toke.type == 'num': toke.set_value(toke.tag) # 创建节点并新增 self.creat_node(symbol, self.node_parent_dict[symbol][-1], toke) self.node_parent_dict[symbol].pop(-1) if len(self.node_parent_dict[symbol]) == 0: self.node_parent_dict.pop(symbol) toke = self.stack_toke.pop(-1) if log: # print('\t*HIT: {}\t<-\t{}'.format(symbol, toke)) anlsLog += "\t*HIT: {}\t<-\t{}\n".format(symbol, toke) if toke == '#': break elif symbol in self.Vn: if toke.type in ['var', 'num']: # 变量-数字转换 table_item = self.table[self.Vn.index(symbol)][ self.Vt.index(toke.type)] else: table_item = self.table[self.Vn.index(symbol)][ self.Vt.index(toke.tag)] table_item = table_item.split(' ') if table_item[0] == '': # 错误分析 # print('\t*ERROR: {}\t<-\t{}'.format(symbol, toke)) anlsLog += "\t*ERROR: {}\t<-\t{}\n".format(symbol, toke) self.err_info.append( "row: {}, col: {}, token: '{}' cont match '{}'\n". format(toke.row, toke.col, toke, symbol)) elif table_item[0] == 'eps': # 无效回溯 if len(table_item) > 1: # 有效分析 temp = list(reversed(table_item))[0:-1] self.stack_anls.extend(temp) # 添加节点-父节点Hash表 for item in temp: if item not in self.node_parent_dict: self.node_parent_dict[item] = [] self.node_parent_dict[item].append(self.parent_uid) else: # 有效分析 temp = list(reversed(table_item)) self.stack_anls.extend(temp) # 创建节点并新增 self.parent_uid = self.creat_node( symbol, self.node_parent_dict[symbol][-1], symbol) self.node_parent_dict[symbol].pop(-1) if len(self.node_parent_dict[symbol]) == 0: self.node_parent_dict.pop(symbol) # 添加节点-父节点Hash表 for item in temp: if item not in self.node_parent_dict: self.node_parent_dict[item] = [] self.node_parent_dict[item].append(self.parent_uid) if log: # print() # print("symb:\'{}\'----stack:{}".format(symbol, list(reversed(self.stack_anls)))) # print("toke:{}----stack:{}".format(toke, list(reversed(self.stack_toke)))) anlsLog += "\n" anlsLog += "symb:\'{}\'----stack:{}\n".format( symbol, list(reversed(self.stack_anls))) anlsLog += "toke:{}----stack:{}\n".format( toke, list(reversed(self.stack_toke))) symbol = self.stack_anls.pop(-1) self.node_parent_dict.clear() # self.ans_show() if len(self.err_info) == 0: # print('match compete!') anlsRes += "match compete!\n" for item in self.err_info: anlsRes += "{}".format(item) return anlsRes, anlsLog
domain="" subdomain="" domain = tldextract.extract(url).domain subdomain = tldextract.extract(url).subdomain if not (tree.contains(domain)): tree.create_node(domain, domain, parent="ID of root node") #Add domains to root node if subdomain: tree.create_node(subdomain, subdomain+domain, parent=domain) #Add sub-domains to domain node file.close() tree.show(line_type="ascii-emv") #show data as stdout tree.to_graphviz(filename="tree_graphviz") #dump tree as graphviz #dot xxx -Tps -o test.ps -Grankdir=LR #left to right subprocess.call(["dot", "tree_graphviz", "-Tps", "-o" ,"output.ps" ,"-Grankdir=LR"]) #Grankdir=LR option to build tree from left to right #convert -flatten -density 150 -geometry 100% test.ps test.png subprocess.call(["convert" ,"-flatten" ,"-density" ,"150" ,"-geometry" ,"100%" ,"output.ps" , "tree_graphviz.png"],stderr=subprocess.DEVNULL) #convert graphviz to png # rm -rf tree_graphviz output.ps subprocess.call(["rm", "-rf", "tree_graphviz", "output.ps"]) #clear files if os.path.exists("output.txt"): #dump tree as text file subprocess.call(["rm", "-rf", "output.txt"]) tree.save2file('output.txt',line_type="ascii-emv") with open('output.json', 'w') as f: #dump tree as json form f.write(tree.to_json(with_data=True))