Ejemplo n.º 1
0
 def all(self,firstpage=1):
     doc = PDFDocument(self.filename)
     pages = doc.count_pages()
     chosen_pages = [i for i in xrange(firstpage,pages+1)]
     self._l.info("%s - %d pages. All chosen",self.filename,pages)
     def pi(n): return Page(self.filename,n,self.c)
     return [ pi(p) for p in chosen_pages  ]
Ejemplo n.º 2
0
 def randompages(self,count,firstpage=1):
     doc = PDFDocument(self.filename)
     pages = doc.count_pages()
     chosen_pages = random.sample([i for i in xrange(firstpage,pages+1)],min(pages-firstpage+1,count))
     chosen_pages.sort()
     self._l.info("%s - %d pages. %s chosen",self.filename,pages,chosen_pages)
     def pi(n): return Page(self.filename,n,self.c)
     return [ pi(p) for p in chosen_pages ]
Ejemplo n.º 3
0
sys.path.append("..")

from sheetmusic import *
from gamera.core import *

init_gamera()
gspath = '"c:/Program Files/gs/gs8.70/bin/gswin32c.exe"'.replace("/", "\\")
FORMAT = "%(asctime)-15s %(levelname)s [%(name)s.%(funcName)s]  %(message)s"
logging.basicConfig(level=logging.DEBUG, format=FORMAT, filename="convert.log")


for file in sys.argv[1:]:
    print "Converting %s" % file
    start = time.time()
    try:
        doc = PDFDocument(file)
        pages = doc.count_pages()
        chosen_pages = random.sample([i for i in xrange(1, pages + 1)], min(pages, 10))
        chosen_pages.sort()
        logging.info("%s - %d pages. %s chosen", file, pages, chosen_pages)
        dirname = file[:-4]
        if not os.path.exists(dirname):
            os.mkdir(dirname)
        for page in chosen_pages:
            sys.stdout.flush()
            outfile = "%s/%s-page%d.tif" % (dirname, file[:-4], page)
            outfile_staves = "%s/%s-page%d-staves.tif" % (dirname, file[:-4], page)
            outfile_gamscript = "%s-page%d-class.py" % (file[:-4], page)
            cmd = " ".join(
                [
                    gspath,
Ejemplo n.º 4
0
from pdftools.pdffile import PDFDocument
from pdftools.pdftext import Text


def contents_to_text(contents):
    for item in contents:
        if isinstance(item, type([])):
            for i in contents_to_text(item):
                yield i
        elif isinstance(item, Text):
            yield item.text


doc = PDFDocument("declaration.pdf")
n_pages = doc.count_pages()
text = []

for n_page in range(1, (n_pages + 1)):
    print "Page", n_page
    page = doc.read_page(n_page)
    contents = page.read_contents().contents
    text.extend(contents_to_text(contents))

print "".join(text)

f = open("ok.txt", "w")
f.write("".join(text))
f.close()
Ejemplo n.º 5
0
sys.path.append("..")

from sheetmusic import *
from gamera.core import *

init_gamera()
gspath = "\"c:/Program Files/gs/gs8.70/bin/gswin32c.exe\"".replace("/", "\\")
FORMAT = "%(asctime)-15s %(levelname)s [%(name)s.%(funcName)s]  %(message)s"
logging.basicConfig(level=logging.DEBUG, format=FORMAT, filename="convert.log")

for file in sys.argv[1:]:
    print "Converting %s" % file
    start = time.time()
    try:
        doc = PDFDocument(file)
        pages = doc.count_pages()
        chosen_pages = random.sample([i for i in xrange(1, pages + 1)],
                                     min(pages, 10))
        chosen_pages.sort()
        logging.info("%s - %d pages. %s chosen", file, pages, chosen_pages)
        dirname = file[:-4]
        if not os.path.exists(dirname):
            os.mkdir(dirname)
        for page in chosen_pages:
            sys.stdout.flush()
            outfile = "%s/%s-page%d.tif" % (dirname, file[:-4], page)
            outfile_staves = "%s/%s-page%d-staves.tif" % (dirname, file[:-4],
                                                          page)
            outfile_gamscript = "%s-page%d-class.py" % (file[:-4], page)
            cmd = ' '.join([