def run(config, engine_name, page_xml_stream): engine = Engine.from_config(config, engine_name) dump = xml_dump.Iterator.from_page_xml(page_xml_stream) for page in dump: revisions = [revision for revision in page] if len(revisions) == 2: last, current = revisions delta = engine.diff(last.text, current.text) if current.contributor is not None: user_id = current.contributor.id user_text = current.contributor.user_text else: user_id = None user_text = None print( json.dumps( Revision( current.id, current.timestamp, current.sha1, page.id, User(user_id, user_text), delta ).to_json() ) )
def run(config, engine_name, store_name, paths, logger_name, threads): threads = int(threads) if threads is not None else None log.load_config(config, logger_name) logger.info("Configuring primer...") engine = Engine.from_config(config, engine_name) store = Store.from_config(config, store_name) try: primer = XMLDump(engine, store, paths, threads=threads) except errors.ChangeWarning as e: print(str(e)) if confirm("Would you like to continue anyway?", default="no", stream=sys.stderr): primer = XMLDump(engine, store, paths, force_config=True, threads=threads) else: sys.exit(1) logger.info("Starting primer:\n" + " - engine: {0}\n".format(engine) + " - paths: {0}".format(len(paths))) primer.start()