Example #1
0
# TODO: RPC over HTTPS
# TODO: a static-url function to set long cache TTL on media URLs
# TODO: Nginx cache media
# TODO: sitemap.xml

if __name__ == "__main__":
    opts = options.options()

    # TODO: Mongo connection options
    db = motor.MotorClient().open_sync().motorblog
    cache.startup(db)

    if opts.rebuild_indexes or opts.ensure_indexes:
        indexes.ensure_indexes(
            db.connection.sync_client().motorblog,
            opts.rebuild_indexes)

    base_url = opts.base_url

    class U(tornado.web.URLSpec):
        def __init__(self, pattern, *args, **kwargs):
            """Include base_url in pattern"""
            super(U, self).__init__(
                '/' + base_url.strip('/') + '/' + pattern.lstrip('/'),
                *args, **kwargs
            )

        def _find_groups(self):
            """Get rid of final '?' -- Tornado's reverse_url() works poorly
               with tornado.web.addslash
Example #2
0
                          " and switching to branch 'motor',\n"
                          " then put the mongo-python-driver directory"
                          " on your PYTHONPATH\n\n")

    raise

if __name__ == "__main__":
    opts = options.options()

    # TODO: Mongo connection options
    db = motor.MotorConnection().open_sync().motorblog
    cache.startup(db)

    if opts.ensure_indexes:
        logging.info('Ensuring indexes...')
        indexes.ensure_indexes(db)
        logging.info('    done.')

    base_url = opts.base_url

    class U(tornado.web.URLSpec):
        def __init__(self, pattern, *args, **kwargs):
            """Include base_url in pattern"""
            super(U, self).__init__(
                '/' + base_url.strip('/') + '/' + pattern.lstrip('/'), *args,
                **kwargs)

        def _find_groups(self):
            """Get rid of final '?' -- Tornado's reverse_url() works poorly
               with tornado.web.addslash
            """
def main(args):
    start = time.time()

    opts = options.options()
    destination_url = '/' + opts.base_url.lstrip('/')
    parts = urlparse(args.source_url)
    source_base_url = urljoin(
        '%s://%s' % (parts[0], parts[1]), parts[2].split('/xmlrpc.php')[0])

    print 'Base URL', source_base_url

    db = pymongo.Connection(safe=True).motorblog
    motordb = motor.MotorConnection().open_sync().motorblog
    if args.wipe:
        print 'Wiping motorblog database'
        db.connection.drop_database('motorblog')
        print 'Creating capped collection "events"'
        create_events_collection(motordb)
        print 'Recreating indexes'
        ensure_indexes(db)

    source = Blog(
        args.source_url, args.source_username, args.source_password,
        use_cache=not args.refresh, verbose=args.verbose)
    print 'Getting media library'

    media_library = set([
        m['link'] for m in source.get_media_library()])

    print '    %s assets\n' % len(media_library)

    print 'Getting posts and pages'
    post_structs = source.get_recent_posts(args.nposts)
    print '    %s posts' % len(post_structs)
    page_structs = source.get_pages()
    print '    %s pages' % len(page_structs)
    print

    for structs, type in [
        (post_structs, 'post'),
        (page_structs, 'page'),
    ]:
        print '%sS' % type.upper()
        for struct in structs:
            categories = struct.pop('categories', [])
            struct['description'] = wordpress_to_markdown(
                struct, media_library, db, destination_url, source_base_url)

            post = Post.from_metaweblog(struct, type)

            print '%-34s %s' % (post.title, post.status.upper())
            for category_name in categories:
                doc = db.categories.find_one({'name': category_name})
                if doc:
                    category = Category(**doc)
                else:
                    category = Category(
                        name=category_name, slug=slugify(category_name))
                    category.id = db.categories.insert(category.to_python())
                print '    %-30s %s' % (
                    category_name, ' NEW' if not doc else ''
                )

                post.categories.append(category)

            db.posts.insert(post.to_python())

        print '\nFinished %s %ss' % (len(structs), type)


    print 'Posting "categories_changed" event'
    db.events.insert(
        {'ts': datetime.datetime.utcnow(), 'name': 'categories_changed'},
        manipulate=False) # No need to add _id

    print '\nFinished in %.2f seconds' % (time.time() - start)
def main(args):
    start = time.time()

    opts = options.options()
    destination_url = '/' + opts.base_url.lstrip('/')
    parts = urlparse(args.source_url)
    source_base_url = urljoin('%s://%s' % (parts[0], parts[1]),
                              parts[2].split('/xmlrpc.php')[0])

    print 'Base URL', source_base_url

    db = pymongo.Connection(safe=True).motorblog
    motordb = motor.MotorClient().open_sync().motorblog
    if args.wipe:
        print 'Wiping motorblog database'
        db.connection.drop_database('motorblog')
        print 'Creating capped collection "events"'
        create_events_collection(motordb)
        print 'Recreating indexes'
        ensure_indexes(db)

    source = Blog(args.source_url,
                  args.source_username,
                  args.source_password,
                  use_cache=not args.refresh,
                  verbose=args.verbose)
    print 'Getting media library'

    media_library = set([m['link'] for m in source.get_media_library()])

    print '    %s assets\n' % len(media_library)

    print 'Getting posts and pages'
    post_structs = source.get_recent_posts(args.nposts)
    print '    %s posts' % len(post_structs)
    page_structs = source.get_pages()
    print '    %s pages' % len(page_structs)
    print

    for structs, post_type in [
        (post_structs, 'post'),
        (page_structs, 'page'),
    ]:
        print '%sS' % post_type.upper()
        for struct in structs:
            categories = struct.pop('categories', [])
            struct['description'] = wordpress_to_markdown(
                struct, media_library, db, destination_url, source_base_url)

            post = Post.from_metaweblog(struct, post_type)

            print '%-34s %s' % (post.title, post.status.upper())
            for category_name in categories:
                doc = db.categories.find_one({'name': category_name})
                if doc:
                    category = Category(**doc)
                else:
                    category = Category(name=category_name,
                                        slug=slugify(category_name))
                    category.id = db.categories.insert(category.to_python())
                print '    %-30s %s' % (category_name,
                                        ' NEW' if not doc else '')

                post.categories.append(category)

            db.posts.insert(post.to_python())

        print '\nFinished %s %ss' % (len(structs), post_type)

    print 'Posting "categories_changed" event'

    db.events.insert(
        {
            'ts': datetime.datetime.utcnow(),
            'name': 'categories_changed'
        },
        manipulate=False)  # No need to add _id

    print '\nFinished in %.2f seconds' % (time.time() - start)
Example #5
0
        " then put the mongo-python-driver directory"
        " on your PYTHONPATH\n\n"
        )

    raise

if __name__ == "__main__":
    opts = options.options()

    # TODO: Mongo connection options
    db = motor.MotorConnection().open_sync().motorblog
    cache.startup(db)

    if opts.ensure_indexes:
        logging.info('Ensuring indexes...')
        indexes.ensure_indexes(db)
        logging.info('    done.')

    base_url = opts.base_url
    
    class U(tornado.web.URLSpec):
        def __init__(self, pattern, *args, **kwargs):
            """Include base_url in pattern"""
            super(U, self).__init__(
                '/' + base_url.strip('/') + '/' + pattern.lstrip('/'),
                *args, **kwargs
            )

        def _find_groups(self):
            """Get rid of final '?' -- Tornado's reverse_url() works poorly
               with tornado.web.addslash
Example #6
0
# TODO: RPC over HTTPS
# TODO: a static-url function to set long cache TTL on media URLs
# TODO: Nginx cache media
# TODO: sitemap.xml

if __name__ == "__main__":
    define_options(opts)
    opts.parse_command_line()
    for handler in logging.getLogger().handlers:
        if hasattr(handler, 'baseFilename'):
            print 'Logging to', handler.baseFilename
            break

    # TODO: Mongo connection options
    db = motor.MotorClient().open_sync().motorblog
    cache.startup(db)

    if opts.rebuild_indexes or opts.ensure_indexes:
        indexes.ensure_indexes(db.connection.sync_client().motorblog,
                               drop=opts.rebuild_indexes)

    this_dir = os.path.dirname(__file__)
    application = application.get_application(this_dir, db, opts)
    http_server = httpserver.HTTPServer(application, xheaders=True)
    http_server.listen(opts.port)
    msg = 'Listening on port %s' % opts.port
    print msg
    logging.info(msg)
    tornado.ioloop.IOLoop.instance().start()