def main(argv): import getopt def usage(): print 'usage: %s [-d] [-p pages] [-P password] [-c codec] [-o output] file ...' % argv[0] return 100 try: (opts, args) = getopt.getopt(argv[1:], 'dp:P:c:o:') except getopt.GetoptError: return usage() if not args: return usage() debug = 0 cmapdir = 'CMap' cdbcmapdir = 'CDBCMap' codec = 'ascii' pages = set() password = '' outfp = stdout for (k, v) in opts: if k == '-d': debug += 1 elif k == '-p': pages.add(int(v)) elif k == '-P': password = v elif k == '-c': codec = v elif k == '-o': outfp = file(v, 'wb') # CMapDB.initialize(cmapdir, cdbcmapdir, debug=debug) rsrc = PDFResourceManager(debug=debug) for fname in args: pdf2txt(outfp, rsrc, fname, pages, codec, password=password, debug=debug) return
def convert_cmap(files, cmapdir, cdbcmapdir, force=False): from cmap import CMapDB CMapDB.initialize(cmapdir) for fname in files: if fname.endswith('.upr'): continue cmapname = os.path.basename(fname) cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb') if not force and os.path.exists(cdbname): print >>stderr, 'Skipping: %r' % cdbname continue print >>stderr, 'Reading: %r...' % fname cmap = CMapDB.get_cmap(cmapname) dumpcdb(cmap, cdbname) return
def __init__(self, spec): try: self.basefont = literal_name(spec["BaseFont"]) except KeyError: if STRICT: raise PDFFontError("BaseFont is missing") self.basefont = "unknown" self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {})) self.cidcoding = "%s-%s" % ( self.cidsysteminfo.get("Registry", "unknown"), self.cidsysteminfo.get("Ordering", "unknown"), ) try: name = literal_name(spec["Encoding"]) except KeyError: if STRICT: raise PDFFontError("Encoding not specified") name = "unknown" try: self.cmap = CMapDB.get_cmap(name, strict=STRICT) except CMapDB.CMapNotFound, e: raise PDFFontError(e)
def get_cmap(self, name): return CMapDB.get_cmap(name, strict=STRICT)
def get_cmap(self, cmapname, strict=False): return CMapDB.get_cmap(cmapname, strict=strict)