def __init__(self, source):
     name = source.BaseFont[1:]
     self.name = self.lookup.get(name, name)
     self.remap = chr
     self.twobyte = False
     info = source.ToUnicode
     if not info:
         return
     info = info.stream.split('beginbfchar')[1].split('endbfchar')[0]
     info = list(PdfTokens(info))
     assert not len(info) & 1
     info2 = []
     for x in info:
         assert x[0] == '<' and x[-1] == '>' and len(x) in (4, 6), x
         i = int(x[1:-1], 16)
         info2.append(i)
     self.remap = dict(
         (x, chr(y)) for (x, y) in zip(info2[::2], info2[1::2])).get
     self.twobyte = len(info[0]) > 4
Exemple #2
0
from music21 import *

from pdfrw import PdfReader, PdfWriter, PdfTokens
from pdfrw.findobjs import page_per_xobj

CLEF_MAPPING = {"(&)": clef.TrebleClef,
                "(V)": clef.Treble8vbClef,
                "(?)": clef.BassClef}

inpfn, = sys.argv[1:]
outfn = 'extract.' + os.path.basename(inpfn)
doc = PdfReader(inpfn)
page = doc.pages[0]
# page.Contents.stream = page.Contents.stream[:21000]
tokens = PdfTokens(page.Contents.stream)
indent = 0
commands = ["q", "Q", "ET", "BT", "cm", "Tm", "Tf", "s", "m", "l", "S", "TJ", "f", "Tj", "k", "re", "W", "n", "K", "w", "c"]
params = []
items = []
subcommands = []
for token in tokens:
    if token == "q":
        indent += 1
        if subcommands:
            items.append(subcommands)
        subcommands = []
    elif token == "Q":
        indent -= 1
        items.append(subcommands)
        #print(subcommands)