Esempio n. 1
0
def main(argv):
  import getopt
  def usage():
    print 'usage: %s [-d] [-p pages] [-P password] [-c codec] [-o output] file ...' % argv[0]
    return 100
  try:
    (opts, args) = getopt.getopt(argv[1:], 'dp:P:c:o:')
  except getopt.GetoptError:
    return usage()
  if not args: return usage()
  debug = 0
  cmapdir = 'CMap'
  cdbcmapdir = 'CDBCMap'
  codec = 'ascii'
  pages = set()
  password = ''
  outfp = stdout
  for (k, v) in opts:
    if k == '-d': debug += 1
    elif k == '-p': pages.add(int(v))
    elif k == '-P': password = v
    elif k == '-c': codec = v
    elif k == '-o': outfp = file(v, 'wb')
  #
  CMapDB.initialize(cmapdir, cdbcmapdir, debug=debug)
  rsrc = PDFResourceManager(debug=debug)
  for fname in args:
    pdf2txt(outfp, rsrc, fname, pages, codec, password=password, debug=debug)
  return
Esempio n. 2
0
def convert_cmap(files, cmapdir, cdbcmapdir, force=False):
  from cmap import CMapDB
  CMapDB.initialize(cmapdir)
  for fname in files:
    if fname.endswith('.upr'): continue
    cmapname = os.path.basename(fname)
    cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb')
    if not force and os.path.exists(cdbname):
      print >>stderr, 'Skipping: %r' % cdbname
      continue
    print >>stderr, 'Reading: %r...' % fname
    cmap = CMapDB.get_cmap(cmapname)
    dumpcdb(cmap, cdbname)
  return
Esempio n. 3
0
 def __init__(self, spec):
     try:
         self.basefont = literal_name(spec["BaseFont"])
     except KeyError:
         if STRICT:
             raise PDFFontError("BaseFont is missing")
         self.basefont = "unknown"
     self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {}))
     self.cidcoding = "%s-%s" % (
         self.cidsysteminfo.get("Registry", "unknown"),
         self.cidsysteminfo.get("Ordering", "unknown"),
     )
     try:
         name = literal_name(spec["Encoding"])
     except KeyError:
         if STRICT:
             raise PDFFontError("Encoding not specified")
         name = "unknown"
     try:
         self.cmap = CMapDB.get_cmap(name, strict=STRICT)
     except CMapDB.CMapNotFound, e:
         raise PDFFontError(e)
Esempio n. 4
0
 def get_cmap(self, name):
     return CMapDB.get_cmap(name, strict=STRICT)
Esempio n. 5
0
 def get_cmap(self, cmapname, strict=False):
     return CMapDB.get_cmap(cmapname, strict=strict)
Esempio n. 6
0
 def get_cmap(self, cmapname, strict=False):
     return CMapDB.get_cmap(cmapname, strict=strict)