def get_title(self):
        """
        Extract the title of the Government Law Proposal.

        Example title produced:
        http://www.knesset.gov.il/Laws/Data/BillGoverment/538/538.pdf

        2010.-ע"שתה ,(םיילילפה םיכילהה לועיי) (66 'סמ ןוקית) ילילפה ןידה רדס קוח תעצה
        """
        if self._title is None:
            all_title = reverse_numbers(''.join(asblocks(self.get_page_text(0))[-1]))
            self._title = all_title[:all_title.find('. ')].strip().replace('\n', ' ')
        return self._title
Exemple #2
0
    def get_title(self):
        """
        Extract the title of the Government Law Proposal.

        Example title produced:
        http://www.knesset.gov.il/Laws/Data/BillGoverment/538/538.pdf

        2010.-ע"שתה ,(םיילילפה םיכילהה לועיי) (66 'סמ ןוקית) ילילפה ןידה רדס קוח תעצה
        """
        if self._title is None:
            all_title = reverse_numbers(''.join(
                asblocks(self.get_page_text(0))[-1]))
            self._title = all_title[:all_title.find('. ')].strip().replace(
                '\n', ' ')
        return self._title
Exemple #3
0
def num_blocks(filename, x=0, y=0, W=0, H=0):
    return len(asblocks(filename, x=x, y=y, W=W, H=H))
Exemple #4
0
def isempty(filename, x=0, y=0, W=0, H=0):
    return len(asblocks(filename, x, y, W, H)) == 0
Exemple #5
0
    numbers = set('pages file_size'.split())

    def convert(k, v):
        if k in numbers: return int(v.split()[0])
        return v

    data = [(k, convert(k, v.strip()))
            for k, v in ((camel_to_lower_case(k), v.strip()) for k, v in (
                l.split(':', 1) for l in capture_output([PDFINFO, filename])))]
    pdfinfo.__dict__.update(data)
    return pdfinfo


def isempty(filename, x=0, y=0, W=0, H=0):
    return len(asblocks(filename, x, y, W, H)) == 0


def num_blocks(filename, x=0, y=0, W=0, H=0):
    return len(asblocks(filename, x=x, y=y, W=W, H=H))


if __name__ == '__main__':
    # Test code - not used
    filename = '538.pdf'
    fulltext = asblocks(filename)
    texts = [pdftotext(filename, x=x, W=1000) for x in xrange(100)]
    checksums = [checksum(filename, x=x, W=1000) for x in xrange(100)]
    for i in xrange(len(checksums) - 1):
        if checksums[i] != checksums[i + 1]:
            print "change at %s" % i
def asreversed_number_blocks(text):
    return asblocks([reverse_numbers(l).strip() for l in text])
def num_blocks(filename, x=0, y=0, W=0, H=0):
    return len(asblocks(filename, x=x,y=y,W=W,H=H))
def isempty(filename, x=0, y=0, W=0, H=0):
    return len(asblocks(filename, x, y, W, H)) == 0
                self.filename, self.pages, self.file_size, self.mod_date)
        __repr__ = __str__
    pdfinfo = PdfInfo()
    pdfinfo.filename = filename
    numbers = set('pages file_size'.split())
    def convert(k, v):
        if k in numbers: return int(v.split()[0])
        return v
    data = [(k, convert(k, v.strip())) for k, v in
            ((camel_to_lower_case(k), v.strip()) for k,v in
            (l.split(':',1) for l in capture_output([PDFINFO, filename])))]
    pdfinfo.__dict__.update(data)
    return pdfinfo

def isempty(filename, x=0, y=0, W=0, H=0):
    return len(asblocks(filename, x, y, W, H)) == 0

def num_blocks(filename, x=0, y=0, W=0, H=0):
    return len(asblocks(filename, x=x,y=y,W=W,H=H))

if __name__ == '__main__':
    # Test code - not used
    filename = '538.pdf'
    fulltext = asblocks(filename)
    texts = [pdftotext(filename, x=x,W=1000) for x in xrange(100)]
    checksums = [checksum(filename, x=x, W=1000) for x in xrange(100)]
    for i in xrange(len(checksums)-1):
        if checksums[i] != checksums[i+1]:
            print "change at %s" % i

Exemple #10
0
def asreversed_number_blocks(text):
    return asblocks([reverse_numbers(l).strip() for l in text])