Ejemplo n.º 1
0
def main():
    global args
    FORMAT = "%(asctime)-15s %(levelname)s %(message)s"
    logging.basicConfig(level=logging.INFO, format=FORMAT)

    logger.info("BEGIN new-solr-updater")

    args = parse_arguments()
    process_args(args)

    # set OL URL when running on a dev-instance
    if args.ol_url:
        host = web.lstrips(args.ol_url, "http://").strip("/")
        update_work.set_query_host(host)
    
    print str(args)
    logger.info("loading config")
    config = load_config(args.config)

    state_file = args.state_file
    offset = read_state_file(state_file)

    logfile = InfobaseLog(config['infobase_server'])
    logfile.seek(offset)

    solr = Solr()

    while True:
        records = logfile.read_records()
        keys = parse_log(records)
        count = update_keys(keys)

        offset = logfile.tell()
        logger.info("saving offset %s", offset)
        with open(state_file, "w") as f:
            f.write(offset)

        if COMMIT:
            logger.info("solr commit")
            solr.commit(ndocs=count)
        else:
            logger.info("not doing solr commit as commit is off")


        # don't sleep after committing some records. 
        # While the commit was on, some more edits might have happened.
        if count == 0:
            logger.info("No more log records available, sleeping...")
            time.sleep(5)
def main():
    global args
    FORMAT = "%(asctime)-15s %(levelname)s %(message)s"
    logging.basicConfig(level=logging.INFO, format=FORMAT)

    logger.info("BEGIN new-solr-updater")

    args = parse_arguments()
    process_args(args)

    # set OL URL when running on a dev-instance
    if args.ol_url:
        host = web.lstrips(args.ol_url, "http://").strip("/")
        update_work.set_query_host(host)

    logger.info(str(args))
    logger.info("loading config from %s", args.config)
    load_config(args.config)

    state_file = args.state_file
    offset = read_state_file(state_file)

    logfile = InfobaseLog(config.get('infobase_server'))
    logfile.seek(offset)

    solr = Solr()

    while True:
        records = logfile.read_records()
        keys = parse_log(records)
        count = update_keys(keys)

        if logfile.tell() != offset:
            offset = logfile.tell()
            logger.info("saving offset %s", offset)
            with open(state_file, "w") as f:
                f.write(offset)

        if COMMIT:
            solr.commit(ndocs=count)
        else:
            logger.info("not doing solr commit as commit is off")

        # don't sleep after committing some records.
        # While the commit was on, some more edits might have happened.
        if count == 0:
            logger.debug("No more log records available, sleeping...")
            time.sleep(5)
Ejemplo n.º 3
0
async def main(
    ol_config: str,
    debugger=False,
    state_file='solr-update.state',
    exclude_edits_containing: str = None,
    ol_url='http://openlibrary.org/',
    solr_url: str = None,
    solr_next=False,
    socket_timeout=10,
    load_ia_scans=False,
    commit=True,
    initial_state: str = None,
):
    """
    :param debugger: Wait for a debugger to attach before beginning
    :param exclude_edits_containing: Don't index matching edits
    :param solr_url: If wanting to override what's in the config file
    :param solr_next: Whether to assume new schema/etc are used
    :param initial_state: State to use if state file doesn't exist. Defaults to today.
    """
    FORMAT = "%(asctime)-15s %(levelname)s %(message)s"
    logging.basicConfig(level=logging.INFO, format=FORMAT)
    logger.info("BEGIN new-solr-updater")

    if debugger:
        import debugpy

        logger.info("Enabling debugger attachment (attach if it hangs here)")
        debugpy.listen(address=('0.0.0.0', 3000))
        logger.info("Waiting for debugger to attach...")
        debugpy.wait_for_client()
        logger.info("Debugger attached to port 3000")

    # Sometimes archive.org requests blocks forever.
    # Setting a timeout will make the request fail instead of waiting forever.
    socket.setdefaulttimeout(socket_timeout)

    # set OL URL when running on a dev-instance
    if ol_url:
        host = web.lstrips(ol_url, "http://").strip("/")
        update_work.set_query_host(host)

    if solr_url:
        update_work.set_solr_base_url(solr_url)

    update_work.set_solr_next(solr_next)

    logger.info("loading config from %s", ol_config)
    load_config(ol_config)

    offset = read_state_file(state_file, initial_state)

    logfile = InfobaseLog(config.get('infobase_server'),
                          exclude=exclude_edits_containing)
    logfile.seek(offset)

    solr = Solr()

    while True:
        records = logfile.read_records()
        keys = parse_log(records, load_ia_scans)
        count = await update_keys(keys)

        if logfile.tell() != offset:
            offset = logfile.tell()
            logger.info("saving offset %s", offset)
            with open(state_file, "w") as f:
                f.write(offset)

        if commit:
            solr.commit(ndocs=count)
        else:
            logger.info("not doing solr commit as commit is off")

        # don't sleep after committing some records.
        # While the commit was on, some more edits might have happened.
        if count == 0:
            logger.debug("No more log records available, sleeping...")
            time.sleep(5)