コード例 #1
14
    def lineDiff(self,lines):
        values = []
        for i in xrange(0, len(lines) - 1):
            a = lines[i]
            b = lines[i + 1]

            a_info = common.lineExtract(a)
            b_info = common.lineExtract(b)
            values.append(b_info['base'] - a_info['base'])

        return common.mostCommon(values)
コード例 #2
0
    def lineSummary(self, line):
        base = []
        top = []
        height = []
        left = None
        right = None
        chars = 0

        for token in line.iter('TOKEN'):
            info = common.tokenExtract(token)
            base.append(info['base'])
            top.append(info['top'])
            height.append(info['height'])

            if (left == None):
                left = info['left']
            else:
                if (info['left'] < left):
                    left = info['left']

            if (right == None):
                right = info['right']
            else:
                if (info['right'] > right):
                    right = info['right']

            chars = chars + info['chars']

        # apply summary
        if (len(base) <= 2):
            line.set('base', unicode(common.largest(base)))
        else:
            line.set('base', unicode(common.mostCommon(base)))

        if (len(top) <= 2):
            line.set('top', unicode(common.smallest(top)))
        else:
            line.set('top', unicode(common.mostCommon(top)))

        line.set('left', unicode(left))
        line.set('right', unicode(right))

        if (len(height) <= 2):
            line.set('height', unicode(common.largest(height)))
        else:
            line.set('height', unicode(common.mostCommon(height)))
        line.set('chars', unicode(chars))
コード例 #3
0
ファイル: lineExtractors.py プロジェクト: KasaiDot/pdf2epub
    def lineSummary(self, line):
        base   = []
        top    = []
        height = []
        left  = None
        right = None 
        chars = 0

        for token in line.iter('TOKEN'):
            info = common.tokenExtract(token)
            base.append(info['base'])
            top.append(info['top'])
            height.append(info['height'])
            
            if (left == None):
                left = info['left']
            else:
                if (info['left'] < left):
                    left = info['left']

            if (right == None):
                right = info['right']
            else:
                if (info['right'] > right):
                    right = info['right']

            chars = chars + info['chars']

        # apply summary
        if (len(base) <= 2):
            line.set('base', unicode(common.largest(base)))
        else:
            line.set('base', unicode(common.mostCommon(base)))

        if (len(top) <= 2):
            line.set('top', unicode(common.smallest(top)))
        else:
            line.set('top',    unicode(common.mostCommon(top)))

        line.set('left',   unicode(left))
        line.set('right',  unicode(right))
        
        if (len(height) <= 2):
            line.set('height', unicode(common.largest(height)))
        else:
            line.set('height', unicode(common.mostCommon(height)))
        line.set('chars',  unicode(chars))
コード例 #4
0
    def lineDiff(self, lines):
        values = []
        for i in xrange(0, len(lines) - 1):
            a = lines[i]
            b = lines[i + 1]

            a_info = common.lineExtract(a)
            b_info = common.lineExtract(b)
            values.append(b_info['base'] - a_info['base'])

        return common.mostCommon(values)
コード例 #5
0
    def estimateJustify(self, paragraph, c):
        lines = paragraph.getchildren()
        right = []
        for i in xrange(0, len(lines)):
            info = common.lineExtract(lines[i])
            right.append(info['right'])

        if (len(right) <= 2):
            indent = common.largest(right)
        else:
            indent = common.mostCommon(right)
        return indent
コード例 #6
0
    def estimateJustify(self, paragraph, c):
        lines = paragraph.getchildren()
        right = []
        for i in xrange(0,len(lines)):
            info = common.lineExtract(lines[i])
            right.append(info['right'])

        if (len(right) <= 2):
            indent = common.largest(right)
        else:
            indent = common.mostCommon(right)
        return indent
コード例 #7
0
    def paragraphSummary(self, para):
        base = None
        top = None
        left = None
        right = None
        chars = 0
        lines = 0
        height = []
        for line in para.iter('LINE'):
            info = common.lineExtract(line)

            if (base == None):
                base = info['base']
            else:
                if (info['base'] > base):
                    base = info['base']

            if (top == None):
                top = info['top']
            else:
                if (info['top'] < top):
                    top = info['top']

            if (left == None):
                left = info['left']
            else:
                if (info['left'] < left):
                    left = info['left']

            if (right == None):
                right = info['right']
            else:
                if (info['right'] > right):
                    right = info['right']

            chars = chars + info['chars']
            height.append(info['height'])
            lines += 1

        # apply summary
        para.set('base', unicode(base))
        para.set('top', unicode(top))
        para.set('left', unicode(left))
        para.set('right', unicode(right))
        para.set('chars', unicode(chars))
        para.set('height', unicode(common.mostCommon(height)))
        para.set('lines', unicode(lines))
コード例 #8
0
    def paragraphSummary(self, para):
        base  = None
        top   = None
        left  = None
        right = None
        chars = 0
        lines = 0
        height = []
        for line in para.iter('LINE'):
            info = common.lineExtract(line)

            if (base == None):
                base = info['base']
            else:
                if (info['base'] > base):
                    base = info['base']
            
            if (top == None):
                top = info['top']
            else:
                if (info['top'] < top):
                    top = info['top']

            if (left == None):
                left = info['left']
            else:
                if (info['left'] < left):
                    left = info['left']

            if (right == None):
                right = info['right']
            else:
                if (info['right'] > right):
                    right = info['right']

            chars = chars + info['chars']
            height.append(info['height'])
            lines += 1

        # apply summary        
        para.set('base',   unicode(base))
        para.set('top',    unicode(top))
        para.set('left',   unicode(left))
        para.set('right',  unicode(right))
        para.set('chars',  unicode(chars))
        para.set('height', unicode(common.mostCommon(height)))
        para.set('lines', unicode(lines))