Example #1
0
    def all(self, dom, marker):
        tables = dom.getElementsByTagName('table')
        tree = Node().loadNodeTree(dom,0)

        itables = []
        self.tDfs(tree,itables)
        for table in itables:
            c = 0
            table.result = match(table, self.maxDist,self.height, self.tags)
            tr, td, t = self.count_table(table)

            #print table.result

            d = {}
            for o in table.result:
                if o:
                    d.setdefault(o,0)
                    d[o]+=1

            for i in d:
                c += d[i]

            pred = 0
            if c >= 2:
                if tr > 0 and td/float(tr) > 1:
                    pred = 1

            self.c(table, pred)

            marker.mark(table.dom, 'table')
Example #2
0
    def dfs(self, node, maxDist, height, tags):
        """
        Navega em profundidade na árvore buscando nós adjacentes com distancia
        de edição menor que maxDist e os agrupa na mesma componente.

        @param Node node: No atual da dfs
        @param int esp: Nível da árvore
        @param float maxDist: Proporção máxima de diferença para ser agrupado
        na mesma componente
        """

        node.result = match(node, maxDist, height, tags)

        for x in xrange(0,len(node.childNodes)):
            if not node.result[x]:
                self.dfs(node.childNodes[x], maxDist, height, tags)
Example #3
0
    def _mark2(self, dom, marker, postProcess=False):

        tables = dom.getElementsByTagName('table')
        tree = Node().loadNodeTree(dom,0)

        itables = []
        self.tDfs(tree,itables)

        #print 'tables',  len(tables)
        #print 'itables', len(itables)

        for table in itables:
            p = False

            if postProcess:
                (tr,td, t) = self.count_tr_td(table)
            else:
                (tr, td) = 0,1

            if tr > 0 and td/float(tr) > 1:
                p = True

            if p or not postProcess:
                table.result = match(table, self.maxDist,self.height, self.tags)

                #print table.result

                d = {}
                for o in table.result:
                    if o:
                        d.setdefault(o,0)
                        d[o]+=1

                c = 0
                for i in d:
                    c += d[i]

                if postProcess:
                    if c >= 2:
                        print 'mark', td/float(tr)
                        marker.mark(table.dom,'table')
                else:
                    if c >= 2:
                        marker.mark(table.dom,'table')