def main(): options, keys = parse_options() # set query host host = web.lstrips(options.server, "http://").strip("/") set_query_host(host) # load config config.load(options.config) update_keys(keys)
def setup_solr_updater(): from infogami import config # solr-updater reads configuration from openlibrary.config.runtime_config from openlibrary import config as olconfig olconfig.runtime_config = config.__dict__ # The solr-updater makes a http call to the website insted of using the # infobase API. It requires setting the host before start using it. from openlibrary.catalog.utils.query import set_query_host dev_instance_url = config.get("dev_instance_url", "http://127.0.0.1:8080/") host = web.lstrips(dev_instance_url, "http://").strip("/") set_query_host(host)
def main(): options, keys = parse_options() # set query host host = web.lstrips(options.server, "http://").strip("/") set_query_host(host) if options.monkeypatch: monkeypatch(options.config) # load config config.load(options.config) logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") update_keys(keys, commit=not options.nocommit)
parser.add_argument('--just_consider_authors', action='store_true') parser.add_argument('--limit', default=None) args = parser.parse_args() handle_author_merge = args.handle_author_merge only_author_merge = args.only_author_merge skip_author_merge = args.skip_author_merge if only_author_merge: handle_author_merge = True if handle_author_merge: from openlibrary.catalog.works.find_works import find_title_redirects, find_works, get_books, books_query, update_works ol = OpenLibrary("http://" + args.server) set_query_host(args.server) done_login = False config_file = args.config config.load(config_file) base = 'http://%s/openlibrary.org/log/' % config.runtime_config[ 'infobase_server'] skip_user = set(u.lower() for u in args.skip_user) only_user = set(u.lower() for u in args.only_user) if 'state_dir' not in config.runtime_config: print 'state_dir missing from ' + config_file sys.exit(0)
handle_author_merge = False if handle_author_merge: from openlibrary.catalog.works.find_works import ( find_title_redirects, find_works, get_books, books_query, update_works, ) (options, args) = parser.parse_args() ol = OpenLibrary("http://" + options.server) set_query_host(options.server) done_login = False config_file = options.config config.load(config_file) base = "http://%s/openlibrary.org/log/" % config.runtime_config["infobase_server"] if "state_dir" not in config.runtime_config: print "state_dir missing from " + config_file sys.exit(0) state_file = config.runtime_config["state_dir"] + "/" + options.state_file if not exists(state_file): print "start point needed. do this:"
parser.add_argument('--just_consider_authors', action='store_true') parser.add_argument('--limit', default=None) args = parser.parse_args() handle_author_merge = args.handle_author_merge only_author_merge = args.only_author_merge skip_author_merge = args.skip_author_merge if only_author_merge: handle_author_merge = True if handle_author_merge: from openlibrary.catalog.works.find_works import find_title_redirects, find_works, get_books, books_query, update_works ol = OpenLibrary("http://" + args.server) set_query_host(args.server) done_login = False config_file = args.config config.load(config_file) solr_works = config.runtime_config["plugin_worksearch"]["solr"] solr_subjects = config.runtime_config["plugin_worksearch"]["subject_solr"] def fix_hardcoded_config(): from openlibrary.catalog.utils import query query = query_host = args.server from openlibrary.solr import update update.solr_works = solr_works update.solr_subjects = solr_subjects
from urllib import urlopen, quote_plus from openlibrary.catalog.utils.query import withKey, set_query_host from openlibrary.solr.update_work import update_author, update_work, get_work_subjects, add_field, solr_update, AuthorRedirect from collections import defaultdict from lxml.etree import tostring, Element parser = argparse.ArgumentParser(description='solr author merge') parser.add_argument('--config', default='openlibrary.yml') parser.add_argument('--state_file', default='author_merge_work_finder') args = parser.parse_args() config_file = args.config config.load(config_file) base = 'http://%s/openlibrary.org/log/' % config.runtime_config['infobase_server'] set_query_host('openlibrary.org') state_file = config.runtime_config['state_dir'] + '/' + args.state_file offset = open(state_file).readline()[:-1] #out = open('author_merge_logs', 'w') re_author_key = re.compile(r'^/(?:a|authors)/(OL\d+A)$') update_times = [] to_drop = set(''';/?:@&=+$,<>#%"{}|\\^[]`\n\r''') subjects_to_update = set() authors_to_update = []
from openlibrary.catalog.utils.query import withKey, set_query_host from openlibrary.solr.update_work import update_author, update_work, get_work_subjects, add_field, solr_update, AuthorRedirect from collections import defaultdict from lxml.etree import tostring, Element from openlibrary.solr.update import subject_count, subject_need_update parser = argparse.ArgumentParser(description='solr author merge') parser.add_argument('--config', default='openlibrary.yml') parser.add_argument('--state_file', default='author_merge') args = parser.parse_args() config_file = args.config config.load(config_file) base = 'http://%s/openlibrary.org/log/' % config.runtime_config['infobase_server'] set_query_host('openlibrary.org') state_file = config.runtime_config['state_dir'] + '/' + args.state_file offset = open(state_file).readline()[:-1] #out = open('author_merge_logs', 'w') re_author_key = re.compile(r'^/(?:a|authors)/(OL\d+A)$') update_times = [] subjects_to_update = set() authors_to_update = [] def solr_update_authors(authors_to_update): for a in authors_to_update: