def main(): global args FORMAT = "%(asctime)-15s %(levelname)s %(message)s" logging.basicConfig(level=logging.INFO, format=FORMAT) logger.info("BEGIN new-solr-updater") args = parse_arguments() process_args(args) # set OL URL when running on a dev-instance if args.ol_url: host = web.lstrips(args.ol_url, "http://").strip("/") update_work.set_query_host(host) print str(args) logger.info("loading config") config = load_config(args.config) state_file = args.state_file offset = read_state_file(state_file) logfile = InfobaseLog(config['infobase_server']) logfile.seek(offset) solr = Solr() while True: records = logfile.read_records() keys = parse_log(records) count = update_keys(keys) offset = logfile.tell() logger.info("saving offset %s", offset) with open(state_file, "w") as f: f.write(offset) if COMMIT: logger.info("solr commit") solr.commit(ndocs=count) else: logger.info("not doing solr commit as commit is off") # don't sleep after committing some records. # While the commit was on, some more edits might have happened. if count == 0: logger.info("No more log records available, sleeping...") time.sleep(5)
def main(): global args FORMAT = "%(asctime)-15s %(levelname)s %(message)s" logging.basicConfig(level=logging.INFO, format=FORMAT) logger.info("BEGIN new-solr-updater") args = parse_arguments() process_args(args) # set OL URL when running on a dev-instance if args.ol_url: host = web.lstrips(args.ol_url, "http://").strip("/") update_work.set_query_host(host) logger.info(str(args)) logger.info("loading config from %s", args.config) load_config(args.config) state_file = args.state_file offset = read_state_file(state_file) logfile = InfobaseLog(config.get('infobase_server')) logfile.seek(offset) solr = Solr() while True: records = logfile.read_records() keys = parse_log(records) count = update_keys(keys) if logfile.tell() != offset: offset = logfile.tell() logger.info("saving offset %s", offset) with open(state_file, "w") as f: f.write(offset) if COMMIT: solr.commit(ndocs=count) else: logger.info("not doing solr commit as commit is off") # don't sleep after committing some records. # While the commit was on, some more edits might have happened. if count == 0: logger.debug("No more log records available, sleeping...") time.sleep(5)
async def main( ol_config: str, debugger=False, state_file='solr-update.state', exclude_edits_containing: str = None, ol_url='http://openlibrary.org/', solr_url: str = None, solr_next=False, socket_timeout=10, load_ia_scans=False, commit=True, initial_state: str = None, ): """ :param debugger: Wait for a debugger to attach before beginning :param exclude_edits_containing: Don't index matching edits :param solr_url: If wanting to override what's in the config file :param solr_next: Whether to assume new schema/etc are used :param initial_state: State to use if state file doesn't exist. Defaults to today. """ FORMAT = "%(asctime)-15s %(levelname)s %(message)s" logging.basicConfig(level=logging.INFO, format=FORMAT) logger.info("BEGIN new-solr-updater") if debugger: import debugpy logger.info("Enabling debugger attachment (attach if it hangs here)") debugpy.listen(address=('0.0.0.0', 3000)) logger.info("Waiting for debugger to attach...") debugpy.wait_for_client() logger.info("Debugger attached to port 3000") # Sometimes archive.org requests blocks forever. # Setting a timeout will make the request fail instead of waiting forever. socket.setdefaulttimeout(socket_timeout) # set OL URL when running on a dev-instance if ol_url: host = web.lstrips(ol_url, "http://").strip("/") update_work.set_query_host(host) if solr_url: update_work.set_solr_base_url(solr_url) update_work.set_solr_next(solr_next) logger.info("loading config from %s", ol_config) load_config(ol_config) offset = read_state_file(state_file, initial_state) logfile = InfobaseLog(config.get('infobase_server'), exclude=exclude_edits_containing) logfile.seek(offset) solr = Solr() while True: records = logfile.read_records() keys = parse_log(records, load_ia_scans) count = await update_keys(keys) if logfile.tell() != offset: offset = logfile.tell() logger.info("saving offset %s", offset) with open(state_file, "w") as f: f.write(offset) if commit: solr.commit(ndocs=count) else: logger.info("not doing solr commit as commit is off") # don't sleep after committing some records. # While the commit was on, some more edits might have happened. if count == 0: logger.debug("No more log records available, sleeping...") time.sleep(5)