Esempio n. 1
0
class Xml2Obj:
    '''transform XML to Object'''

    def __init__(self):
        self.root = None
        self.nodeStack = []
        self.numberCount = 0
        self.tree = Tree()

    def StartElement(self, name, attributes):
        'Expat start element event handler'
        # put the element into stack and make it become child_element
        if self.nodeStack:
            parent = self.nodeStack[-1]
            # make instance of class
            element = Element(name, attributes, self.tree,
                              self.numberCount, parent.number)
            parent.addChild(element)
            self.tree = parent.getTree()
        else:
            element = self.root = Element(
                name, attributes, self.tree, self.numberCount, None)
            self.tree.create_node(self.root.name, self.root.number)
        self.numberCount += 1
        self.nodeStack.append(element)

    def EndElement(self, name):
        'Expat end element event handler'
        if self.nodeStack[-1].cdata:
            self.tree.get_node(
                self.nodeStack[-1].number).tag += self.nodeStack[-1].cdata
        self.nodeStack.pop()

    def CharacterData(self, data):
        '''Expat character data event handler'''
        if data.strip():
            element = self.nodeStack[-1]
            element.cdata += data

    def showTree(self):
        self.tree.show(key=lambda node:node.identifier)
        # In https://github.com/caesar0301/treelib/pull/180 :
        # self.tree.show(sorting=False)

    def toDot(self, file):
        self.tree.to_graphviz(file)
        # In https://github.com/caesar0301/treelib/pull/179 :
        # self.tree.to_graphviz(file, sorting=False)

    def Parse(self, filename):
        # create Expat analyzer
        Parser = expat.ParserCreate()
        # Set the Expat event handlers to our methods
        Parser.StartElementHandler = self.StartElement
        Parser.EndElementHandler = self.EndElement
        Parser.CharacterDataHandler = self.CharacterData
        # analyz XML file
        Parser.Parse(open(filename).read(), 1)
        return self.root
Esempio n. 2
0
class Blockchain(object):
    def __init__(self, genesis):
        # TODO: figure out if genesis should be passed in or created here
        # self.tinput = tinput
        self.blockCount = 0
        self.blockchain = Tree()
        self.genesis = genesis
        self.addGenesisBlock(genesis)  #Add the genesis block to chain

    def addGenesisBlock(self, genesis):
        self.blockchain.create_node("Genesis Block" + " ID: " +
                                    genesis.proofOfWork[:12],
                                    genesis.proofOfWork,
                                    data=genesis)

    def printBlockchain(self):
        self.blockchain.show()

    def addBlock(self, block):
        # TODO: run proof of work verification before adding block
        # Add block to chain & return true if POW valid
        # Else return false
        self.blockCount += 1
        self.blockchain.create_node("Block " + str(self.blockCount) + " ID: " +
                                    block.proofOfWork[:12],
                                    block.proofOfWork,
                                    parent=block.prevBlockHash,
                                    data=block)

    def getGenesisID(self):
        return self.blockchain.root

    def getLongestChainBlocks(self):
        allNodes = self.blockchain.all_nodes()
        forkNum = 0  #number of leaves at longest branch
        treeDepth = self.blockchain.depth()
        longestPathLeaves = [
        ]  #WIll hold leaves with treeDepth depth ie longest branch(es)
        for node in allNodes:
            currentDepth = self.blockchain.depth(node)
            if (currentDepth == treeDepth):
                forkNum += 1
                longestPathLeaves.append(node)

        return forkNum, longestPathLeaves

    def blockchainLength(self):
        # returns the depth of the tree ie the length of
        #  the longest chain
        return self.blockchain.depth()

    def numBlocks(self):
        return self.blockchain.size()

    def printChain(self, chain):
        chain.show(data_property="humanID")

    def tailBlocks(self, chain):
        leaves = chain.leaves()
        print("Num leaves" + str(len(leaves)))
        print(leaves)

    def checkBlock(self):
        # Check the proof work work
        # return true if proof of work is valid
        # else rerturn false
        print("printing block")

    def createBlockchainGraph(self, outfilename):
        print("creating graph")
        self.blockchain.to_graphviz(filename=outfilename + '.gv',
                                    shape=u'box',
                                    graph=u'digraph')
        g = Source.from_file(outfilename + '.gv')
        g.render()

    def createBlockchainImg(self, outfilename):
        print("creating graph")
        self.blockchain.to_graphviz(filename=outfilename + '.gv',
                                    shape=u'box',
                                    graph=u'digraph')
        g = Source.from_file(outfilename + '.png')
        g.render()
Esempio n. 3
0
class Match_base:
    def __init__(self):
        self.token_list = None
        self.index = 0
        self.token = ''
        self.token_node = None
        self.tree = Tree()
        self.anls_proc = []
        self.res = True
        # self.info = []
        self.info = ''

    def set_tokenList(self, token_list):
        self.token_list = token_list
        self.index = 0
        self.token = self.token_list[self.index].tag
        self.token_node = self.token_list[self.index]
        self.tree = Tree()
        self.anls_proc = []
        self.res = True
        # self.info = []
        self.info = ''

    def get_next(self, parent):
        tmp = self.index - len(self.anls_proc)
        if tmp < 0:
            tmp = 0
            self.index += 1
        for i in range(tmp + 1):
            if self.index - tmp + i < len(self.token_list):
                self.anls_proc.append(self.token_list[self.index - tmp +
                                                      i].tag)
        if self.token is not None:
            self.tree.create_node(tag=self.token,
                                  identifier=str(uuid.uuid1()),
                                  parent=parent)

        if self.index >= len(self.token_list) - 1:
            self.index += 1
            self.token = '#'
            self.anls_proc.append(self.token)
            return self.token
        else:
            self.index += 1
            self.token = self.token_list[self.index].tag
            self.token_node = self.token_list[self.index]
            return self.token

    def reset_token(self, re_num=-1):
        if re_num == -1:
            self.index = 0
            self.anls_proc.clear()
            self.token = self.token_list[self.index].tag
            self.token_node = self.token_list[self.index]
        else:
            self.index -= re_num
            for i in range(re_num):
                self.anls_proc.pop(len(self.anls_proc) - 1)
            self.token = self.token_list[self.index].tag
            self.token_node = self.token_list[self.index]

    def creat_node(self, name, parent):
        iid = str(uuid.uuid1())
        if self.tree.size() == 0:
            self.tree.create_node(tag='{}'.format(name), identifier=iid)
        else:
            self.tree.create_node(tag='{}'.format(name),
                                  identifier=iid,
                                  parent=parent)
        return iid

    def func_main(self, parent):
        return False

    def is_var(self):
        res = self.token.isidentifier()
        if self.token in {
                "void", "main", "short", "long", "int", "double", "float",
                "while", "if", "else", "for", "break", "return"
        }:
            res = False
        return res

    def is_const(self):
        return self.token.isdigit()

    def run(self, flag):
        self.res = self.func_main('root')
        if self.res is True:
            if len(self.token_list) > len(self.anls_proc):
                self.info = 'error: {}, token: {}, row: {}, col: {}\n'.format(
                    'unmatched char', self.token_node.tag, self.token_node.row,
                    self.token_node.col)
                if flag:
                    self.res = False
        if self.index == 0:
            self.index += 1
        if len(self.info) == 0:
            self.info = 'all ok'
        return self.res, self.index - 1, self.tree, self.info

    def create_dotPic(self, root_dir):
        if not os.path.exists(root_dir):
            os.makedirs(root_dir)
        self.tree.to_graphviz(filename='{}/tree.dot'.format(root_dir))
        string = open('{}/tree.dot'.format(root_dir)).read()
        dot = graphviz.Source(string)
        dot.render('{}/tree'.format(root_dir), format='png')
Esempio n. 4
0
class Parser_analyzer:
    """
    语句LL(1)文法:
    NEED:expr, 各种终止符
    NOTE:int_t为无法解决:
        A -> B int
        B -> int b | ϵ
    类型的回溯问题采用的特殊方案, 出现在int_t main()位置。
    """
    def __init__(self):
        self.Vn = []  # 非终结符
        self.Vt = []  # 终结符
        self.table = None  # 预测分析表
        self.stack_anls = []
        self.stack_toke = []

        self.err_info = []

        self.AST_Tree = Tree()
        self.AST_Tree_root = None
        self.parent_uid = None
        self.node_parent_dict = None

        self.current_anal_scope = 0

    def load_analyzer(self, prod_path, ff_path):
        prod_set = {}
        prod_set_ori = open(prod_path, 'r', encoding='utf-8').readlines()
        temp_prod = ''
        for item in prod_set_ori:
            item = item.strip()
            if item[0] != '|':
                temp = item.split(' ')
                temp_prod = temp[0]
                res = ''
                for ii in temp[2:]:
                    res += '{} '.format(ii)
                res = res.strip()
                prod_set[temp_prod] = []
                prod_set[temp_prod].append(res)
                if temp_prod not in self.Vn:
                    self.Vn.append(temp_prod)
            else:
                temp = item.split(' ')
                res = ''
                for ii in temp[1:]:
                    res += '{} '.format(ii)
                res = res.strip()
                prod_set[temp_prod].append(res)

        ff_set = {}
        ff_set_ori = open(ff_path, 'r', encoding='utf-8').readlines()
        for item in ff_set_ori:
            item = item.replace('\n', '')
            item = item.split('\t')

            end_symbol = item[0]
            eps_flag = item[1]
            fi_set = item[2].split(' ')
            if len(item) == 4:
                fo_set = item[3].split(' ')
            else:
                fo_set = []

            ff_set[end_symbol] = {
                'eps_flag': eps_flag,
                'fi_set': fi_set,
                'fo_set': fo_set
            }

        self.table = [[] for row in range(len(self.Vn))]  # 预测分析表

        for item in self.Vn:
            item_prod = prod_set[item]
            item_ff = ff_set[item]

            if item_ff['eps_flag'] == 'true':
                item_ff['fi_set'].remove('eps')
            for non in item_ff['fi_set']:
                if non not in self.Vt:
                    self.Vt.append(non)
                    for n in range(len(self.Vn)):
                        self.table[n].append('')
                aim_prod = None
                aim2_prod = None
                for temp_prod in item_prod:
                    temp_shit = temp_prod.split(' ')
                    temp_first = temp_shit[0]
                    if temp_first == 'eps' and len(temp_shit) > 1:
                        aim2_prod = temp_prod
                    if non == temp_first:
                        aim_prod = temp_prod
                        break
                    elif temp_first in ff_set:
                        if non in ff_set[temp_first]['fi_set'] or ff_set[
                                temp_first]['eps_flag'] == 'true':
                            aim_prod = temp_prod
                            break

                if aim_prod is None:
                    aim_prod = aim2_prod
                self.table[self.Vn.index(item)][self.Vt.index(non)] = aim_prod
            if item_ff['eps_flag'] == 'true':
                for non in item_ff['fo_set']:
                    if non not in self.Vt:
                        self.Vt.append(non)
                        for n in range(len(self.Vn)):
                            self.table[n].append('')
                    self.table[self.Vn.index(item)][self.Vt.index(non)] = 'eps'

    def load_stack(self, token_list, start):
        self.stack_anls = []
        self.stack_anls.append('#')
        self.stack_anls.append(start)

        self.stack_toke = []
        self.stack_toke.append('#')
        temp = list(reversed(token_list))
        self.stack_toke.extend(temp)

        self.err_info = []

        self.node_parent_dict = {start: [None]}

    def table_show(self):
        res = ''
        # print(self.Vt)
        res += "{}\n".format(str(self.Vt))
        idx = 0
        for item in self.table:
            # print('{}'.format(self.Vn[idx]), end='\t')
            res += "{}\t".format(self.Vn[idx])
            idx2 = 0
            for jt in item:
                # print('\'{}\'({})'.format(jt, self.Vt[idx2]), end=' ')
                res += "'{}'({}) ".format(jt, self.Vt[idx2])
                idx2 += 1
            # print()
            res += '\n'
            idx += 1
        return res

    def ans_show(self):
        print(self.stack_anls)
        print(self.stack_toke)
        print()

    def creat_node(self, tag, parent, data):
        if self.AST_Tree.size() == 0:
            node = self.AST_Tree.create_node(tag='{}'.format(tag), data=data)
            self.AST_Tree_root = node
        else:
            node = self.AST_Tree.create_node(tag='{}'.format(tag),
                                             parent=parent,
                                             data=data)
        return node.identifier

    def create_dotPic(self, root_dir):
        # root_dir = './treePic'
        self.AST_Tree.to_graphviz(filename='{}/tree.dot'.format(root_dir))
        string = open('{}/tree.dot'.format(root_dir)).read()
        dot = graphviz.Source(string)
        dot.render('{}/tree'.format(root_dir), format='png')

    def run(self, log=False):
        anlsRes = ''
        anlsLog = ''
        toke = self.stack_toke.pop(-1)
        symbol = self.stack_anls.pop(-1)
        while symbol != '#':
            if symbol in [toke.tag, toke.type]:
                # 刷新作用域
                if symbol == '{':
                    self.current_anal_scope += 1
                elif symbol == '}':
                    self.current_anal_scope -= 1
                else:
                    toke.set_scope(self.current_anal_scope)
                # 刷新真值
                if toke.type == 'num':
                    toke.set_value(toke.tag)
                # 创建节点并新增
                self.creat_node(symbol, self.node_parent_dict[symbol][-1],
                                toke)
                self.node_parent_dict[symbol].pop(-1)
                if len(self.node_parent_dict[symbol]) == 0:
                    self.node_parent_dict.pop(symbol)
                toke = self.stack_toke.pop(-1)
                if log:
                    # print('\t*HIT: {}\t<-\t{}'.format(symbol, toke))
                    anlsLog += "\t*HIT: {}\t<-\t{}\n".format(symbol, toke)
                if toke == '#':
                    break
            elif symbol in self.Vn:
                if toke.type in ['var', 'num']:  # 变量-数字转换
                    table_item = self.table[self.Vn.index(symbol)][
                        self.Vt.index(toke.type)]
                else:
                    table_item = self.table[self.Vn.index(symbol)][
                        self.Vt.index(toke.tag)]
                table_item = table_item.split(' ')
                if table_item[0] == '':  # 错误分析
                    # print('\t*ERROR: {}\t<-\t{}'.format(symbol, toke))
                    anlsLog += "\t*ERROR: {}\t<-\t{}\n".format(symbol, toke)
                    self.err_info.append(
                        "row: {}, col: {}, token: '{}' cont match '{}'\n".
                        format(toke.row, toke.col, toke, symbol))
                elif table_item[0] == 'eps':  # 无效回溯
                    if len(table_item) > 1:  # 有效分析
                        temp = list(reversed(table_item))[0:-1]
                        self.stack_anls.extend(temp)
                        # 添加节点-父节点Hash表
                        for item in temp:
                            if item not in self.node_parent_dict:
                                self.node_parent_dict[item] = []
                            self.node_parent_dict[item].append(self.parent_uid)
                else:  # 有效分析
                    temp = list(reversed(table_item))
                    self.stack_anls.extend(temp)
                    # 创建节点并新增
                    self.parent_uid = self.creat_node(
                        symbol, self.node_parent_dict[symbol][-1], symbol)
                    self.node_parent_dict[symbol].pop(-1)
                    if len(self.node_parent_dict[symbol]) == 0:
                        self.node_parent_dict.pop(symbol)
                    # 添加节点-父节点Hash表
                    for item in temp:
                        if item not in self.node_parent_dict:
                            self.node_parent_dict[item] = []
                        self.node_parent_dict[item].append(self.parent_uid)
                    if log:
                        # print()
                        # print("symb:\'{}\'----stack:{}".format(symbol, list(reversed(self.stack_anls))))
                        # print("toke:{}----stack:{}".format(toke, list(reversed(self.stack_toke))))
                        anlsLog += "\n"
                        anlsLog += "symb:\'{}\'----stack:{}\n".format(
                            symbol, list(reversed(self.stack_anls)))
                        anlsLog += "toke:{}----stack:{}\n".format(
                            toke, list(reversed(self.stack_toke)))
            symbol = self.stack_anls.pop(-1)
        self.node_parent_dict.clear()
        # self.ans_show()
        if len(self.err_info) == 0:
            # print('match compete!')
            anlsRes += "match compete!\n"
        for item in self.err_info:
            anlsRes += "{}".format(item)
        return anlsRes, anlsLog
Esempio n. 5
0
    domain=""
    subdomain=""
    domain = tldextract.extract(url).domain  
    subdomain = tldextract.extract(url).subdomain
    if not (tree.contains(domain)):
        tree.create_node(domain, domain, parent="ID of root node") #Add domains to root node
    if subdomain:    
        tree.create_node(subdomain, subdomain+domain, parent=domain) #Add sub-domains to domain node



file.close()

tree.show(line_type="ascii-emv") #show data as stdout

tree.to_graphviz(filename="tree_graphviz") #dump tree as graphviz
#dot  xxx -Tps -o test.ps -Grankdir=LR #left to right 
subprocess.call(["dot", "tree_graphviz", "-Tps", "-o" ,"output.ps" ,"-Grankdir=LR"]) #Grankdir=LR option to build tree from left to right
#convert -flatten -density 150 -geometry 100% test.ps test.png
subprocess.call(["convert" ,"-flatten" ,"-density" ,"150" ,"-geometry" ,"100%" ,"output.ps" ,
                 "tree_graphviz.png"],stderr=subprocess.DEVNULL) #convert graphviz to png 
# rm -rf tree_graphviz output.ps
subprocess.call(["rm", "-rf", "tree_graphviz", "output.ps"]) #clear files


if os.path.exists("output.txt"): #dump tree as text file
    subprocess.call(["rm", "-rf", "output.txt"])
tree.save2file('output.txt',line_type="ascii-emv")
with open('output.json', 'w') as f: #dump tree as json form
    f.write(tree.to_json(with_data=True))