Ejemplo n.º 1
0
def run(config, engine_name, page_xml_stream):
    
    engine = Engine.from_config(config, engine_name)
    dump = xml_dump.Iterator.from_page_xml(page_xml_stream)
    
    for page in dump:
        
        revisions = [revision for revision in page]
        
        if len(revisions) == 2:
            last, current = revisions
            delta = engine.diff(last.text, current.text)
            
            if current.contributor is not None:
                user_id = current.contributor.id
                user_text = current.contributor.user_text
            else:
                user_id = None
                user_text = None
            
            print(
                json.dumps(
                    Revision(
                        current.id,
                        current.timestamp,
                        current.sha1,
                        page.id,
                        User(user_id, user_text),
                        delta
                    ).to_json()
                )
            )
Ejemplo n.º 2
0
def run(config, engine_name, store_name, paths, logger_name, threads):
    
    threads = int(threads) if threads is not None else None
    
    log.load_config(config, logger_name)
    
    logger.info("Configuring primer...")
    
    
    engine = Engine.from_config(config, engine_name)
    store = Store.from_config(config, store_name)
    
    try:
        primer = XMLDump(engine, store, paths, threads=threads)
    except errors.ChangeWarning as e:
        print(str(e))
        if confirm("Would you like to continue anyway?", default="no",
                   stream=sys.stderr):
            primer = XMLDump(engine, store, paths,
                             force_config=True, threads=threads)
        else:
            sys.exit(1)
    
    logger.info("Starting primer:\n" +
                " - engine: {0}\n".format(engine) +
                " - paths: {0}".format(len(paths)))
    
    primer.start()