def __init__(self, source): name = source.BaseFont[1:] self.name = self.lookup.get(name, name) self.remap = chr self.twobyte = False info = source.ToUnicode if not info: return info = info.stream.split('beginbfchar')[1].split('endbfchar')[0] info = list(PdfTokens(info)) assert not len(info) & 1 info2 = [] for x in info: assert x[0] == '<' and x[-1] == '>' and len(x) in (4, 6), x i = int(x[1:-1], 16) info2.append(i) self.remap = dict( (x, chr(y)) for (x, y) in zip(info2[::2], info2[1::2])).get self.twobyte = len(info[0]) > 4
from music21 import * from pdfrw import PdfReader, PdfWriter, PdfTokens from pdfrw.findobjs import page_per_xobj CLEF_MAPPING = {"(&)": clef.TrebleClef, "(V)": clef.Treble8vbClef, "(?)": clef.BassClef} inpfn, = sys.argv[1:] outfn = 'extract.' + os.path.basename(inpfn) doc = PdfReader(inpfn) page = doc.pages[0] # page.Contents.stream = page.Contents.stream[:21000] tokens = PdfTokens(page.Contents.stream) indent = 0 commands = ["q", "Q", "ET", "BT", "cm", "Tm", "Tf", "s", "m", "l", "S", "TJ", "f", "Tj", "k", "re", "W", "n", "K", "w", "c"] params = [] items = [] subcommands = [] for token in tokens: if token == "q": indent += 1 if subcommands: items.append(subcommands) subcommands = [] elif token == "Q": indent -= 1 items.append(subcommands) #print(subcommands)