Beispiel #1
0
def main():

    logging.basicConfig()

    observer = slob.SimpleTimingObserver()

    args = parse_args()

    outname = args.output_file

    if outname is None:
        noext = basename_notext(args.input_file)
        outname = os.path.extsep.join((noext, 'slob'))

    def p(s):
        sys.stdout.write(s)
        sys.stdout.flush()

    with slob.create(outname,
                     compression=args.compression,
                     workdir=args.work_dir,
                     min_bin_size=args.bin_size * 1024,
                     observer=observer) as slb:
        observer.begin('all')
        observer.begin('content')
        #create tags
        slb.tag('label', '')
        slb.tag('license.name', '')
        slb.tag('license.url', '')
        slb.tag('source', os.path.basename(args.input_file))
        slb.tag('uri', '')
        slb.tag('copyright', '')
        slb.tag('created.by', args.created_by)

        input_file = os.path.expanduser(args.input_file)
        tei = TEI(input_file)
        content_dir = os.path.dirname(__file__)
        slob.add_dir(slb, content_dir, include_only={'js', 'css'}, prefix='~/')
        print('Adding content...')
        for i, item in enumerate(tei):
            if i % 100 == 0 and i: p('.')
            if i % 5000 == 0 and i: p(' {}\n'.format(i))
            if isinstance(item, Tag):
                slb.tag(item.name, item.value)
            else:
                slb.add(item.text, *item.keys, content_type=item.type)

    edition = None
    with slob.open(outname) as s:
        edition = s.tags.get('edition')

    if edition:
        noext, ext = os.path.splitext(outname)
        newname = '{noext}-{edition}{ext}'.format(noext=noext,
                                                  edition=edition,
                                                  ext=ext)
        os.rename(outname, newname)

    print('\nAll done in %s\n' % observer.end('all'))
Beispiel #2
0
def main():

    logging.basicConfig()

    observer = slob.SimpleTimingObserver()

    args = parse_args()

    outname = args.output_file

    if outname is None:
        noext = basename_notext(args.input_file)
        outname = os.path.extsep.join((noext, 'slob'))

    def p(s):
        sys.stdout.write(s)
        sys.stdout.flush()

    with slob.create(outname,
                     compression=args.compression,
                     workdir=args.work_dir,
                     min_bin_size=args.bin_size*1024,
                     observer=observer) as slb:
        observer.begin('all')
        observer.begin('content')
        #create tags
        slb.tag('label', '')
        slb.tag('license.name', '')
        slb.tag('license.url', '')
        slb.tag('source', os.path.basename(args.input_file))
        slb.tag('uri', '')
        slb.tag('copyright', '')
        slb.tag('created.by', args.created_by)

        input_file = os.path.expanduser(args.input_file)
        tei = TEI(input_file)
        content_dir = os.path.dirname(__file__)
        slob.add_dir(slb, content_dir,
                     include_only={'js', 'css'},
                     prefix='~/')
        print('Adding content...')
        for i, item in enumerate(tei):
            if i % 100 == 0 and i: p('.')
            if i % 5000 == 0 and i: p(' {}\n'.format(i))
            if isinstance(item, Tag):
                slb.tag(item.name, item.value)
            else:
                slb.add(item.text, *item.keys, content_type=item.type)

    edition = None
    with slob.open(outname) as s:
        edition = s.tags.get('edition')

    if edition:
        noext, ext = os.path.splitext(outname)
        newname = '{noext}-{edition}{ext}'.format(noext=noext, edition=edition, ext=ext)
        os.rename(outname, newname)

    print('\nAll done in %s\n' % observer.end('all'))
Beispiel #3
0
def main():

    logging.basicConfig()

    observer = slob.SimpleTimingObserver()

    args = parse_args()

    outname = args.output_file

    basename = os.path.basename(args.input_file)

    noext = basename

    if outname is None:
        while True:
            noext, _ext = os.path.splitext(noext)
            if not _ext:
                break
        outname = os.path.extsep.join((noext, 'slob'))

    def p(s):
        sys.stdout.write(s)
        sys.stdout.flush()

    with slob.create(outname,
                     compression=args.compression,
                     workdir=args.work_dir,
                     min_bin_size=args.bin_size*1024,
                     observer=observer) as slb:
        observer.begin('all')
        observer.begin('content')
        #create tags
        slb.tag('label', '')
        slb.tag('license.name', '')
        slb.tag('license.url', '')
        slb.tag('source', basename)
        slb.tag('uri', '')
        slb.tag('copyright', '')
        slb.tag('created.by', args.created_by)
        xdxf = XDXF(make_input(args.input_file),
                    skip_article_title=args.skip_article_title,
                    remove_newline=args.remove_newline)
        content_dir = os.path.dirname(__file__)
        slob.add_dir(slb, content_dir,
                     include_only={'js', 'css'},
                     prefix='~/')
        print('Adding content...')
        for i, item in enumerate(xdxf):
            if i % 100 == 0 and i: p('.')
            if i % 5000 == 0 and i: p(' {}\n'.format(i))
            if isinstance(item, Tag):
                slb.tag(item.name, item.value)
            else:
                slb.add(item.text, *item.keys, content_type=item.type)

    print('\nAll done in %s\n' % observer.end('all'))
Beispiel #4
0
def main():

    logging.basicConfig()

    def p(text):
        sys.stdout.write(text)
        sys.stdout.flush()

    times = {}

    def begin(name):
        times[name] = time.time()

    def end(name):
        t0 = times.pop(name)
        dt = timedelta(seconds=int(time.time() - t0))
        return dt

    def observer(e):
        if e.name == 'begin_finalize':
            p('\nFinished adding content in %s' % end('content'))
            p('\nFinalizing...')
            begin('finalize')
        if e.name == 'end_finalize':
            p('\nFinalized in %s' % end('finalize'))
        elif e.name == 'begin_resolve_aliases':
            p('\nResolving aliases...')
            begin('aliases')
        elif e.name == 'end_resolve_aliases':
            p('\nResolved aliases in %s' % end('aliases'))
        elif e.name == 'begin_sort':
            p('\nSorting...')
            begin('sort')
        elif e.name == 'end_sort':
            p(' sorted in %s' % end('sort'))

    args = parse_args()

    outname = args.output_file
    if outname is None:
        basename = os.path.basename(args.couch_url)
        noext, _ext = os.path.splitext(basename)
        outname = os.path.extsep.join((noext, args.compression, 'slob'))

    def set_tag_from_args(slb, name):
        value = getattr(args, name.replace('.', '_'))
        if value:
            slb.tag(name, value)

    with slob.create(outname,
                     compression=args.compression,
                     workdir=args.work_dir,
                     min_bin_size=args.bin_size * 1024,
                     observer=observer) as slb:
        begin('content')
        #create tags
        slb.tag('license.name', '')
        slb.tag('license.url', '')
        slb.tag('created.by', '')
        slb.tag('copyright', '')
        article_source = CouchArticleSource(args, slb)
        begin('all')
        #command args override article source
        set_tag_from_args(slb, 'license.name')
        set_tag_from_args(slb, 'license.url')
        set_tag_from_args(slb, 'created.by')

        article_source.run()

        include_built_in = {'js', 'css', 'images'}

        if not args.no_math:
            include_built_in.add('MathJax')

        content_dir = os.path.dirname(__file__)
        slob.add_dir(slb,
                     content_dir,
                     include_only=include_built_in,
                     prefix='~/')
        if args.content_dirs:
            for content_dir in args.content_dirs:
                slob.add_dir(slb, content_dir)

    p('\nAll done in %s\n' % end('all'))
Beispiel #5
0
def main():

    logging.basicConfig()

    def p(text):
        sys.stdout.write(text)
        sys.stdout.flush()

    times = {}

    def begin(name):
        times[name] = time.time()

    def end(name):
        t0 = times.pop(name)
        dt = timedelta(seconds=int(time.time() - t0))
        return dt

    def observer(e):
        if e.name == 'begin_finalize':
            p('\nFinished adding content in %s' % end('content'))
            p('\nFinalizing...')
            begin('finalize')
        if e.name == 'end_finalize':
            p('\nFinalized in %s' % end('finalize'))
        elif e.name == 'begin_resolve_aliases':
            p('\nResolving aliases...')
            begin('aliases')
        elif e.name == 'end_resolve_aliases':
            p('\nResolved aliases in %s' % end('aliases'))
        elif e.name == 'begin_sort':
            p('\nSorting...')
            begin('sort')
        elif e.name == 'end_sort':
            p(' sorted in %s' % end('sort'))

    args = parse_args()

    outname = args.output_file
    if outname is None:
        basename = os.path.basename(args.couch_url)
        noext, _ext = os.path.splitext(basename)
        outname = os.path.extsep.join((noext, args.compression, 'slob'))

    def set_tag_from_args(slb, name):
        value = getattr(args, name.replace('.', '_'))
        if value:
            slb.tag(name, value)

    with slob.create(outname,
                     compression=args.compression,
                     workdir=args.work_dir,
                     min_bin_size=args.bin_size*1024,
                     observer=observer) as slb:
        begin('content')
        #create tags
        slb.tag('license.name', '')
        slb.tag('license.url', '')
        slb.tag('created.by', '')
        slb.tag('copyright', '')
        article_source = CouchArticleSource(args, slb)
        begin('all')
        #command args override article source
        set_tag_from_args(slb, 'license.name')
        set_tag_from_args(slb, 'license.url')
        set_tag_from_args(slb, 'created.by')

        article_source.run()

        include_built_in = {'js', 'css', 'images'}

        if not args.no_math:
            include_built_in.add('MathJax')

        content_dir = os.path.dirname(__file__)
        slob.add_dir(slb, content_dir,
                     include_only=include_built_in,
                     prefix='~/')
        if args.content_dirs:
            for content_dir in args.content_dirs:
                slob.add_dir(slb, content_dir)


    p('\nAll done in %s\n' % end('all'))