Ejemplo n.º 1
0
def main(argv):
  import getopt
  def usage():
    print 'usage: %s [-d] [-p pages] [-P password] [-c codec] [-o output] file ...' % argv[0]
    return 100
  try:
    (opts, args) = getopt.getopt(argv[1:], 'dp:P:c:o:')
  except getopt.GetoptError:
    return usage()
  if not args: return usage()
  debug = 0
  cmapdir = 'CMap'
  cdbcmapdir = 'CDBCMap'
  codec = 'ascii'
  pages = set()
  password = ''
  outfp = stdout
  for (k, v) in opts:
    if k == '-d': debug += 1
    elif k == '-p': pages.add(int(v))
    elif k == '-P': password = v
    elif k == '-c': codec = v
    elif k == '-o': outfp = file(v, 'wb')
  #
  CMapDB.initialize(cmapdir, cdbcmapdir, debug=debug)
  rsrc = PDFResourceManager(debug=debug)
  for fname in args:
    pdf2txt(outfp, rsrc, fname, pages, codec, password=password, debug=debug)
  return
Ejemplo n.º 2
0
def convert_cmap(files, cmapdir, cdbcmapdir, force=False):
  from cmap import CMapDB
  CMapDB.initialize(cmapdir)
  for fname in files:
    if fname.endswith('.upr'): continue
    cmapname = os.path.basename(fname)
    cdbname = os.path.join(cdbcmapdir, cmapname+'.cmap.cdb')
    if not force and os.path.exists(cdbname):
      print >>stderr, 'Skipping: %r' % cdbname
      continue
    print >>stderr, 'Reading: %r...' % fname
    cmap = CMapDB.get_cmap(cmapname)
    dumpcdb(cmap, cdbname)
  return