def main(): logging.basicConfig() observer = slob.SimpleTimingObserver() args = parse_args() outname = args.output_file if outname is None: noext = basename_notext(args.input_file) outname = os.path.extsep.join((noext, 'slob')) def p(s): sys.stdout.write(s) sys.stdout.flush() with slob.create(outname, compression=args.compression, workdir=args.work_dir, min_bin_size=args.bin_size * 1024, observer=observer) as slb: observer.begin('all') observer.begin('content') #create tags slb.tag('label', '') slb.tag('license.name', '') slb.tag('license.url', '') slb.tag('source', os.path.basename(args.input_file)) slb.tag('uri', '') slb.tag('copyright', '') slb.tag('created.by', args.created_by) input_file = os.path.expanduser(args.input_file) tei = TEI(input_file) content_dir = os.path.dirname(__file__) slob.add_dir(slb, content_dir, include_only={'js', 'css'}, prefix='~/') print('Adding content...') for i, item in enumerate(tei): if i % 100 == 0 and i: p('.') if i % 5000 == 0 and i: p(' {}\n'.format(i)) if isinstance(item, Tag): slb.tag(item.name, item.value) else: slb.add(item.text, *item.keys, content_type=item.type) edition = None with slob.open(outname) as s: edition = s.tags.get('edition') if edition: noext, ext = os.path.splitext(outname) newname = '{noext}-{edition}{ext}'.format(noext=noext, edition=edition, ext=ext) os.rename(outname, newname) print('\nAll done in %s\n' % observer.end('all'))
def main(): logging.basicConfig() observer = slob.SimpleTimingObserver() args = parse_args() outname = args.output_file if outname is None: noext = basename_notext(args.input_file) outname = os.path.extsep.join((noext, 'slob')) def p(s): sys.stdout.write(s) sys.stdout.flush() with slob.create(outname, compression=args.compression, workdir=args.work_dir, min_bin_size=args.bin_size*1024, observer=observer) as slb: observer.begin('all') observer.begin('content') #create tags slb.tag('label', '') slb.tag('license.name', '') slb.tag('license.url', '') slb.tag('source', os.path.basename(args.input_file)) slb.tag('uri', '') slb.tag('copyright', '') slb.tag('created.by', args.created_by) input_file = os.path.expanduser(args.input_file) tei = TEI(input_file) content_dir = os.path.dirname(__file__) slob.add_dir(slb, content_dir, include_only={'js', 'css'}, prefix='~/') print('Adding content...') for i, item in enumerate(tei): if i % 100 == 0 and i: p('.') if i % 5000 == 0 and i: p(' {}\n'.format(i)) if isinstance(item, Tag): slb.tag(item.name, item.value) else: slb.add(item.text, *item.keys, content_type=item.type) edition = None with slob.open(outname) as s: edition = s.tags.get('edition') if edition: noext, ext = os.path.splitext(outname) newname = '{noext}-{edition}{ext}'.format(noext=noext, edition=edition, ext=ext) os.rename(outname, newname) print('\nAll done in %s\n' % observer.end('all'))
def main(): logging.basicConfig() observer = slob.SimpleTimingObserver() args = parse_args() outname = args.output_file basename = os.path.basename(args.input_file) noext = basename if outname is None: while True: noext, _ext = os.path.splitext(noext) if not _ext: break outname = os.path.extsep.join((noext, 'slob')) def p(s): sys.stdout.write(s) sys.stdout.flush() with slob.create(outname, compression=args.compression, workdir=args.work_dir, min_bin_size=args.bin_size*1024, observer=observer) as slb: observer.begin('all') observer.begin('content') #create tags slb.tag('label', '') slb.tag('license.name', '') slb.tag('license.url', '') slb.tag('source', basename) slb.tag('uri', '') slb.tag('copyright', '') slb.tag('created.by', args.created_by) xdxf = XDXF(make_input(args.input_file), skip_article_title=args.skip_article_title, remove_newline=args.remove_newline) content_dir = os.path.dirname(__file__) slob.add_dir(slb, content_dir, include_only={'js', 'css'}, prefix='~/') print('Adding content...') for i, item in enumerate(xdxf): if i % 100 == 0 and i: p('.') if i % 5000 == 0 and i: p(' {}\n'.format(i)) if isinstance(item, Tag): slb.tag(item.name, item.value) else: slb.add(item.text, *item.keys, content_type=item.type) print('\nAll done in %s\n' % observer.end('all'))
def main(): logging.basicConfig() def p(text): sys.stdout.write(text) sys.stdout.flush() times = {} def begin(name): times[name] = time.time() def end(name): t0 = times.pop(name) dt = timedelta(seconds=int(time.time() - t0)) return dt def observer(e): if e.name == 'begin_finalize': p('\nFinished adding content in %s' % end('content')) p('\nFinalizing...') begin('finalize') if e.name == 'end_finalize': p('\nFinalized in %s' % end('finalize')) elif e.name == 'begin_resolve_aliases': p('\nResolving aliases...') begin('aliases') elif e.name == 'end_resolve_aliases': p('\nResolved aliases in %s' % end('aliases')) elif e.name == 'begin_sort': p('\nSorting...') begin('sort') elif e.name == 'end_sort': p(' sorted in %s' % end('sort')) args = parse_args() outname = args.output_file if outname is None: basename = os.path.basename(args.couch_url) noext, _ext = os.path.splitext(basename) outname = os.path.extsep.join((noext, args.compression, 'slob')) def set_tag_from_args(slb, name): value = getattr(args, name.replace('.', '_')) if value: slb.tag(name, value) with slob.create(outname, compression=args.compression, workdir=args.work_dir, min_bin_size=args.bin_size * 1024, observer=observer) as slb: begin('content') #create tags slb.tag('license.name', '') slb.tag('license.url', '') slb.tag('created.by', '') slb.tag('copyright', '') article_source = CouchArticleSource(args, slb) begin('all') #command args override article source set_tag_from_args(slb, 'license.name') set_tag_from_args(slb, 'license.url') set_tag_from_args(slb, 'created.by') article_source.run() include_built_in = {'js', 'css', 'images'} if not args.no_math: include_built_in.add('MathJax') content_dir = os.path.dirname(__file__) slob.add_dir(slb, content_dir, include_only=include_built_in, prefix='~/') if args.content_dirs: for content_dir in args.content_dirs: slob.add_dir(slb, content_dir) p('\nAll done in %s\n' % end('all'))
def main(): logging.basicConfig() def p(text): sys.stdout.write(text) sys.stdout.flush() times = {} def begin(name): times[name] = time.time() def end(name): t0 = times.pop(name) dt = timedelta(seconds=int(time.time() - t0)) return dt def observer(e): if e.name == 'begin_finalize': p('\nFinished adding content in %s' % end('content')) p('\nFinalizing...') begin('finalize') if e.name == 'end_finalize': p('\nFinalized in %s' % end('finalize')) elif e.name == 'begin_resolve_aliases': p('\nResolving aliases...') begin('aliases') elif e.name == 'end_resolve_aliases': p('\nResolved aliases in %s' % end('aliases')) elif e.name == 'begin_sort': p('\nSorting...') begin('sort') elif e.name == 'end_sort': p(' sorted in %s' % end('sort')) args = parse_args() outname = args.output_file if outname is None: basename = os.path.basename(args.couch_url) noext, _ext = os.path.splitext(basename) outname = os.path.extsep.join((noext, args.compression, 'slob')) def set_tag_from_args(slb, name): value = getattr(args, name.replace('.', '_')) if value: slb.tag(name, value) with slob.create(outname, compression=args.compression, workdir=args.work_dir, min_bin_size=args.bin_size*1024, observer=observer) as slb: begin('content') #create tags slb.tag('license.name', '') slb.tag('license.url', '') slb.tag('created.by', '') slb.tag('copyright', '') article_source = CouchArticleSource(args, slb) begin('all') #command args override article source set_tag_from_args(slb, 'license.name') set_tag_from_args(slb, 'license.url') set_tag_from_args(slb, 'created.by') article_source.run() include_built_in = {'js', 'css', 'images'} if not args.no_math: include_built_in.add('MathJax') content_dir = os.path.dirname(__file__) slob.add_dir(slb, content_dir, include_only=include_built_in, prefix='~/') if args.content_dirs: for content_dir in args.content_dirs: slob.add_dir(slb, content_dir) p('\nAll done in %s\n' % end('all'))