Example #1
0
def main(argv):
    if len(argv) > 0 and argv[0] == '-h':
        usage()
        sys.exit(0)
    
    if not os.path.isdir('./' + outdir+ '/'):
        os.mkdir('./' + outdir + '/')

    id = iarchive.infer_book_id()
    iabook = iarchive.Book(id, '', '.')
    visualize(iabook)
Example #2
0
def main(argv):
    out_name = None
    import getopt
    try:
        opts, args = getopt.getopt(argv,
                                   'dho:',
                                   ['debug', 'help', 'outfile=',
                                    'document=',
                                    'daisy', 'epub', 'test', 'report', 'hocr',
                                    'toc='])
    except getopt.GetoptError:
        usage()
        sys.exit(-1)
    debug_output = False
    found_output_opt = False
    make_epub = False
    make_daisy = False
    make_test = False
    make_report = False
    make_hocr = False
    toc = None
    doc = ''
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage()
            sys.exit()
        elif opt in ('-d', '--debug'):
            debug_output = True
        elif opt in ('--daisy'):
            make_daisy = True
            found_output_opt = True
        elif opt in ('--epub'):
            make_epub = True
            found_output_opt = True
        elif opt in ('--test'):
            make_test = True
            found_output_opt = True
        elif opt in ('--report'):
            make_report = True
            found_output_opt = True
        elif opt in ('--hocr'):
            make_hocr = True
            found_output_opt = True
        elif opt in ('-o', '--outfile'):
            out_name = arg
        elif opt in ('--document'):
            doc = arg
        elif opt in ('--toc'):
            if len(arg) > 0:
                toc = json.loads(arg)
                if toc is not None and len(toc) > 0:
                    # accept openlibrary toc format (array of tocitems)
                    # or original bespoke format: hash of pagenum -> title
                    try:
                        item0 = toc[0]
                        oldtoc = toc
                        toc = {}
                        for tocitem in oldtoc:
                            chapterstr = None
                            if 'pagenum' in tocitem:
                                if 'label' in tocitem and 'title' in tocitem:
                                    chapterstr = '%s - %s' % (tocitem['label'],
                                                              tocitem['title'])
                                elif 'label' in tocitem:
                                    chapterstr = tocitem['label']
                                elif 'title' in tocitem:
                                    chapterstr = tocitem['title']
                                if chapterstr is not None:
                                    toc[tocitem['pagenum']] = chapterstr

                    except TypeError:
                        toc = None

                    except KeyError:
                        # there must be a better way to detect an array...
                        toc = None
    if not found_output_opt:
        make_epub = True
    if len(args) == 0:
        book_id = iarchive.infer_book_id()
        if book_id is None:
            print 'No args given and no book found in current directory'
            usage()
            sys.exit(-1)
        book_path = '.'
    elif len(args) == 1:
        book_id = args[0]
        if not os.path.exists(book_id):
            print 'Only book_id arg given, and no corresponding book dir found'
            usage()
            sys.exit(-1)
        book_path = book_id
    elif len(args) == 2:
        book_id = args[0]
        book_path = args[1]
    elif len(args) == 3:
        if out_name is not None:
            print 'outfile found as 3rd argument, but outfile is already specified via -o'
            usage()
            sys.exit(-1)
        book_id = args[0]
        book_path = args[1]
        out_name = args[2]
    else:
        print 'unrecognized extra arguments ' + args[3:]
        usage()
        sys.exit(-1)

    if out_name is None:
        if len(doc) > 0:
            out_root = os.path.basename(doc)
        else:
            out_root = book_id
        if make_daisy:
            out_name = out_root + '_daisy.zip'
        elif make_test:
            out_name = out_root + '.test'
        elif make_report:
            out_name = out_root + '.report'
        elif make_hocr:
            out_name = out_root + '.html'
        else:
            out_name = out_root + '.epub'

    iabook = iarchive.Book(book_id, doc, book_path, toc=toc)
    metadata = iabook.get_metadata()
    if make_daisy:
        ebook = daisy.Book(out_name, metadata)

        alt_booktext = "This is a protected daisy format book.  If you are hearing this message, then your device is missing the appropriate key to read this book.  For more information, see the archive.org daisy faq."
#         iabook_to_daisy.process_book(iabook, ebook, alt_booktext)
        iabook_to_daisy.process_book(iabook, ebook)
    elif make_test:
        print iabook.analyze()
        sys.exit(0)
    elif make_report:
        print iabook.report()
        sys.exit(0)
    elif make_hocr:
        raise 'NYI'
        iabook_to_hocr.process_book(iabook)
    else:
        ebook = epub.Book(out_name, metadata)
        iabook_to_epub.process_book(iabook, ebook)

    ebook.finish(metadata)

    if debug_output:
        if make_daisy:
            output = os.popen('rm -rf daisy_debug')
            output.read()
            output = os.popen('unzip -d daisy_debug ' + out_name)
            output.read()
            zedval = os.path.join(sys.path[0], 'Zedval/ZedVal.jar')
            opf_file = os.path.join('daisy_debug',
                                    iabook.get_book_id() + '_daisy.opf')
            output = os.popen('java -Xms128m -Xmx256m -jar '
                              + zedval + ' ' + opf_file)
        else:
            epubcheck = os.path.join(sys.path[0], 'epubcheck/epubcheck-1.1.jar')
            output = os.popen('java -jar ' + epubcheck + ' ' + out_name)
        print output.read()
Example #3
0
def main(argv):
    import optparse
    parser = optparse.OptionParser()
    parser = optparse.OptionParser(usage='usage: %prog [options]',
                                   version='%prog 0.1',
                                   description='A visualizer for '
                                   'coordinate-annotated OCR data.')
    def legend_callback(option, opt_str, value, parser):
        legend()
        sys.exit(0)
    parser.add_option('--legend', '-l',
                     action='callback',
                     callback=legend_callback,
                     help='Display legend information - for generated images')
    parser.add_option('--reduce',
                      action='store',
                      type='int',
                      metavar='n',
                      default=2,
                      help='For jp2 input images, reduce jp2 resolution '
                      'by 2 ^ n when reading '
                      'original image, for speed.  This also reduces the '
                      'output scale by 2 ^ n, unless otherwise specified '
                      'with --scale.')
    parser.add_option('--scale',
                      action='store',
                      type='int',
                      default=0,
                      help='Scale result images down from original scan '
                      'resolution.')
    parser.add_option('--last',
                      action='store',
                      type='int',
                      metavar='leaf',
                      default=0,
                      help='Stop generating output leaves '
                      'after the specified leaf')
    parser.add_option('--first',
                      action='store',
                      type='int',
                      metavar='leaf',
                      default=0,
                      help='Don\'t generate output leaves until the '
                      'specified leaf')
    parser.add_option('--leaf',
                      action='store',
                      type='int',
                      metavar='leaf',
                      default=0,
                      help='Only generate output for the specified leaf')
    parser.add_option('--text',
                      action='store_true',
                      default=False,
                      help='Generate output characters for OCRed '
                      'text in input files')
    parser.add_option('--outdir',
                      help='Output directory.  Default is source_type + \'_viz\'')
    parser.add_option('--source',
                      choices=['abbyy', 'pdftoxml', 'djvu'],
                      default='abbyy',
                      help='Which source to use for OCR data/coordinates.')
    parser.add_option('--show-opts',
                      action='store_true',
                      # help=optparse.SUPPRESS_HELP
                      help='Display parsed options/defaults and exit')
    global opts
    opts, args = parser.parse_args(argv)
    if opts.reduce < 0 or opts.reduce > 4:
        parser.error('--reduce must be between 0 and 4')
    if opts.scale == 0:
        opts.scale = 2 ** opts.reduce

    if opts.leaf != 0:
        if opts.first > 0 or opts.last > 0:
            parser.error('can\'t specify --last or --first with --leaf')
        opts.last = opts.first = opts.leaf

    if opts.source == 'djvu':
        parser.error('--source=djvu not supported at the moment')

    if opts.outdir is None:
        opts.outdir = opts.source + '_viz'

    if opts.show_opts:
        print 'Options: ' + str(opts)
        print 'Args: ' + str(args)
        sys.exit(0)

    parser.destroy()
    
    if not os.path.isdir('./' + opts.outdir + '/'):
        os.mkdir('./' + opts.outdir + '/')

    id = iarchive.infer_book_id()
    iabook = iarchive.Book(id, '', '.')
    visualize(iabook)