def get_results(self, q, offset=0, limit=100): if config.get('single_core_solr'): valid_fields = ['key', 'name', 'subject_type', 'work_count'] else: valid_fields = ['key', 'name', 'type', 'count'] q = escape_colon(escape_bracket(q), valid_fields) params = { "q.op": "AND", "q": q, "start": offset, "rows": limit, "fl": ",".join(valid_fields), "qt": "standard", "wt": "json" } if config.get('single_core_solr'): params['fq'] = 'type:subject' params['sort'] = 'work_count desc' else: params['sort'] = 'count desc' solr_select = solr_subject_select_url + "?" + urllib.urlencode(params) results = run_solr_search(solr_select) response = results['response'] if config.get('single_core_solr'): response['docs'] = [self.process_doc(doc) for doc in response['docs']] return results
def GET(self): try: if 'counts_db' in config.admin: stats = admin.get_stats() else: stats = None except Exception: logger.error("Error in getting stats", exc_info=True) stats = None blog_posts = get_blog_feeds() lending_list = config.get("home", {}).get("lending_list") returncart_list = config.get("home", {}).get("returncart_list") user = accounts.get_current_user() loans = borrow.get_loans(user) if user else None popular_available, popular_waitlist = popular_carousel() return render_template( "home/index", stats=stats, blog_posts=blog_posts, lending_list=lending_list, returncart_list=returncart_list, user=user, loans=loans, popular_books=popular_available, waitlisted_books=popular_waitlist )
def POST(self): if not support_db: return "Couldn't initialise connection to support database" form = web.input() email = form.get("email", "") topic = form.get("topic", "") description = form.get("question", "") url = form.get("url", "") user = accounts.get_current_user() useragent = web.ctx.env.get("HTTP_USER_AGENT","") if not all([email, topic, description]): return "" c = support_db.create_case(creator_name = user and user.get_name() or "", creator_email = email, creator_useragent = useragent, creator_username = user and user.get_username() or "", subject = topic, description = description, url = url, assignee = config.get("support_case_default_address","*****@*****.**")) # Send an email to the creator of the case subject = "Case #%s: %s"%(c.caseno, topic) message = render_template("email/support_case", c) web.sendmail(config.get("support_case_control_address","*****@*****.**"), email, subject, message) return render_template("email/case_created", c)
def POST(self): i = web.input() f = form_talk() if not f.validates(i): return render_template("talks/submit", form=f) key = new_talk(i) if config.get('from_address') and config.get('talk_submission_contact'): email = render_template("talks/email", i) web.sendmail( from_address=config.from_address, to_address=config.talk_submission_contact, subject=web.safestr(email.subject.strip()), message=web.safestr(email) ) dir = config.get("talks_dir", "/tmp/talks") write("%s/%s.txt" % (dir, time.time()), simplejson.dumps(i)) tweet.tweet("talk_template", title=i.title, author=i.authors, url=web.ctx.home + "/" + key) add_flash_message("info", "Thanks for submitting your talk. The selection committee will review your talk and get in touch with you shortly.") raise web.seeother("/" + key)
def GET(self): i = web.input(q="", limit=5) i.limit = safeint(i.limit, 5) solr = get_authors_solr() name = solr.escape(i.q) + "*" q = 'name:(%s) OR alternate_names:(%s)' % (name, name) params = { 'q_op': 'AND', 'sort': 'work_count desc' } if config.get('single_core_solr'): params['fq'] = 'type:author' data = solr.select(q, **params) docs = data['docs'] for d in docs: if not config.get('single_core_solr'): d.key = "/authors/" + d.key if 'top_work' in d: d['works'] = [d.pop('top_work')] else: d['works'] = [] d['subjects'] = d.pop('top_subjects', []) return to_json(docs)
def create_connection(): if config.get('infobase_server'): return client.connect(type='remote', base_url=config.infobase_server) elif config.get('db_parameters'): return client.connect(type='local', **config.db_parameters) else: raise Exception("db_parameters are not specified in the configuration")
def OLConnection(): """Create a connection to Open Library infobase server.""" def create_connection(): if config.get("connection_type") == "hybrid": return create_hybrid_connection() elif config.get('infobase_server'): return create_remote_connection() elif config.get("infobase", {}).get('db_parameters'): return create_local_connection() else: raise Exception("db_parameters are not specified in the configuration") conn = create_connection() if config.get('memcache_servers'): conn = MemcacheMiddleware(conn, config.get('memcache_servers')) if config.get('upstream_to_www_migration'): conn = MigrationMiddleware(conn) cache_prefixes = config.get("cache_prefixes", default_cache_prefixes) if cache_prefixes : conn = LocalCacheMiddleware(conn, cache_prefixes) conn = IAMiddleware(conn) return conn
def create_connection(): if config.get("connection_type") == "hybrid": return create_hybrid_connection() elif config.get('infobase_server'): return create_remote_connection() elif config.get("infobase", {}).get('db_parameters'): return create_local_connection() else: raise Exception("db_parameters are not specified in the configuration")
def setup_jquery_urls(): if config.get('use_google_cdn', True): jquery_url = "http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js" jqueryui_url = "http://ajax.googleapis.com/ajax/libs/jqueryui/1.7.2/jquery-ui.min.js" else: jquery_url = "/static/upstream/js/jquery-1.3.2.min.js" jqueryui_url = "/static/upstream/js/jquery-ui-1.7.2.min.js" web.template.Template.globals['jquery_url'] = jquery_url web.template.Template.globals['jqueryui_url'] = jqueryui_url web.template.Template.globals['use_google_cdn'] = config.get('use_google_cdn', True)
def GET(self): try: stats = admin.get_stats() except Exception: stats = None blog_posts = get_blog_feeds() lending_list = config.get("home", {}).get("lending_list") returncart_list = config.get("home", {}).get("returncart_list") return render_template("home/index", stats=stats, blog_posts=blog_posts, lending_list=lending_list, returncart_list=returncart_list)
def POST(self): form = web.input() email = form.get("email", "") topic = form.get("topic", "") description = form.get("question", "") url = form.get("url", "") user = accounts.get_current_user() useragent = web.ctx.env.get("HTTP_USER_AGENT","") if not all([email, topic, description]): return "" default_assignees = config.get("support_default_assignees",{}) topic_key = str(topic.replace(" ","_").lower()) if topic_key in default_assignees: assignee = default_assignees.get(topic_key) else: assignee = default_assignees.get("default", "*****@*****.**") stats.increment("ol.support.all") subject = "Support case *%s*"%topic url = web.ctx.home + url displayname = user and user.get_name() or "" username = user and user.get_username() or "" message = SUPPORT_EMAIL_TEMPLATE % locals() sendmail(email, assignee, subject, message) return render_template("email/case_created", assignee)
def connect_to_tombstone(): global tombstone_db try: tombstone_db_uri = config.get("celery",{})["tombstone_db"] tombstone_db = couchdb.Database(tombstone_db_uri) except Exception,e: logger.warning("Couldn't connect to tombstone database", exc_info = True)
def add_cover(cover_url, ekey): olid = ekey.split("/")[-1] coverstore_url = config.get('coverstore_url').rstrip('/') upload_url = coverstore_url + '/b/upload2' if upload_url.startswith("//"): upload_url = "{0}:{1}".format(web.ctx.get("protocol", "http"), upload_url) user = accounts.get_current_user() params = { 'author': user.key, 'data': None, 'source_url': cover_url, 'olid': olid, 'ip': web.ctx.ip, } for attempt in range(10): try: res = urllib.urlopen(upload_url, urllib.urlencode(params)) except IOError: print 'retry, attempt', attempt sleep(2) continue body = res.read() if body != '': reply = json.loads(body) if res.getcode() == 200 and body != '': if 'id' in reply: break print 'retry, attempt', attempt sleep(2) if not reply or reply.get('message') == 'Invalid URL': return cover_id = int(reply['id']) return cover_id
def get_works_solr(): if config.get('single_core_solr'): base_url = "http://%s/solr" % config.plugin_worksearch.get('solr') else: base_url = "http://%s/solr/works" % config.plugin_worksearch.get('solr') return Solr(base_url)
def _get_solr_data(self): if config.get("single_core_solr"): key = self.key else: key = self.get_olid() fields = [ "cover_edition_key", "cover_id", "edition_key", "first_publish_year", "has_fulltext", "lending_edition", "checked_out", "public_scan_b", "ia"] solr = get_works_solr() stats.begin("solr", query={"key": key}, fields=fields) try: d = solr.select({"key": key}, fields=fields) finally: stats.end() if d.num_found > 0: w = d.docs[0] else: w = None # Replace _solr_data property with the attribute self.__dict__['_solr_data'] = w return w
def work_search(query, limit=20, offset=0, **kw): """Search for works.""" kw.setdefault("doc_wrapper", work_wrapper) fields = [ "key", "author_name", "author_key", "title", "edition_count", "ia", "cover_edition_key", "has_fulltext", "subject", "ia_collection_s", "public_scan_b", "overdrive_s", "lending_edition_s", "lending_identifier_s", ] kw.setdefault("fields", fields) if config.get('single_core_solr'): kw.setdefault("fq", "type:work") query = process_work_query(query) solr = get_works_solr() stats.begin("solr", query=query, start=offset, rows=limit, kw=kw) try: result = solr.select(query, start=offset, rows=limit, **kw) finally: stats.end() return result
def POST(self, key): i = web.input(v=None, _method="GET") recap_plugin_active = 'recaptcha' in config.get('plugins') if recap_plugin_active: public_key = config.plugin_recaptcha.public_key private_key = config.plugin_recaptcha.private_key recap = recaptcha.Recaptcha(public_key, private_key) if not recap.validate(): return 'Recaptcha solution was incorrect. Please <a href="javascript:history.back()">go back</a> and try again.' v = i.v and safeint(i.v, None) work = web.ctx.site.get(key, v) if work is None: raise web.notfound() try: helper = SaveBookHelper(work, None) helper.save(web.input()) add_flash_message("info", utils.get_message("flash_work_updated")) raise web.seeother(work.url()) except (ClientException, ValidationException), e: add_flash_message('error', str(e)) return self.GET(key)
def work_object(w): # called by works_by_author ia = w.get('ia', []) if config.get("single_core_solr"): key = w['key'] else: key = '/works/' + w['key'] obj = dict( authors = [web.storage(key='/authors/' + k, name=n) for k, n in zip(w['author_key'], w['author_name'])], edition_count = w['edition_count'], key = key, title = w['title'], public_scan = w.get('public_scan_b', bool(ia)), lending_edition = w.get('lending_edition_s', ''), lending_identifier = w.get('lending_identifier_s', ''), overdrive = (w['overdrive_s'].split(';') if 'overdrive_s' in w else []), collections = set(w['ia_collection_s'].split(';') if 'ia_collection_s' in w else []), url = key + '/' + urlsafe(w['title']), cover_edition_key = w.get('cover_edition_key'), first_publish_year = (w['first_publish_year'] if 'first_publish_year' in w else None), ia = w.get('ia', []), cover_i = w.get('cover_i') ) if obj['lending_identifier']: doc = web.ctx.site.store.get("ebooks/" + obj['lending_identifier']) or {} obj['checked_out'] = doc.get("borrowed") == "true" else: obj['checked_out'] = False for f in 'has_fulltext', 'subtitle': if w.get(f): obj[f] = w[f] return web.storage(obj)
def setup(): """Do required initialization""" # monkey-patch get_markdown to use OL Flavored Markdown view.get_markdown = get_markdown # Provide alternate implementations for websafe and commify web.websafe = websafe web.template.Template.FILTERS['.html'] = websafe web.template.Template.FILTERS['.xml'] = websafe web.commify = commify web.template.Template.globals.update({ 'HTML': HTML, 'request': Request(), 'logger': logging.getLogger("openlibrary.template"), 'sum': sum, 'get_donation_include': get_donation_include }) from openlibrary.core import helpers as h web.template.Template.globals.update(h.helpers) if config.get('use_gzip') == True: config.middleware.append(GZipMiddleware)
def get_schema(): schema = dbstore.Schema() schema.add_table_group('type', '/type/type') schema.add_table_group('type', '/type/property') schema.add_table_group('type', '/type/backreference') schema.add_table_group('user', '/type/user') schema.add_table_group('user', '/type/usergroup') schema.add_table_group('user', '/type/permission') datatypes = ["str", "int", "ref", "boolean"] schema.add_table_group('edition', '/type/edition', datatypes) schema.add_table_group('author', '/type/author', datatypes) schema.add_table_group('scan', '/type/scan_location', datatypes) schema.add_table_group('scan', '/type/scan_record', datatypes) schema.add_table_group('work', '/type/work', datatypes) schema.add_table_group('publisher', '/type/publisher', datatypes) schema.add_table_group('subject', '/type/subject', datatypes) if 'upstream' in config.get('features', []): schema.add_seq('/type/edition', '/books/OL%dM') schema.add_seq('/type/author', '/authors/OL%dA') else: schema.add_seq('/type/edition', '/b/OL%dM') schema.add_seq('/type/author', '/a/OL%dA') schema.add_seq('/type/work', '/works/OL%dW') schema.add_seq('/type/publisher', '/publishers/OL%dP') return schema
def _editions_view(self, seeds, **kw): reverse = str(kw.pop("reverse", "")).lower() if 'sort' in kw and reverse == "true": # sort=\field is the couchdb-lucene's way of telling ORDER BY field DESC kw['sort'] = '\\' + kw['sort'] view_url = config.get("lists", {}).get("editions_view") if not view_url: return {} def escape(value): special_chars = '+-&|!(){}[]^"~*?:\\' pattern = "([%s])" % re.escape(special_chars) quote = '"' return quote + web.re_compile(pattern).sub(r'\\\1', value) + quote q = " OR ".join("seed:" + escape(seed.encode('utf-8')) for seed in seeds) url = view_url + "?" + urllib.urlencode(dict(kw, q=q)) stats.begin("couchdb", url=url) try: json = urllib2.urlopen(url).read() finally: stats.end() return simplejson.loads(json)
def POST(self): i = web.input('email', 'password', 'username', agreement="no") i.displayname = i.get('displayname') or i.username recap_plugin_active = 'recaptcha' in config.get('plugins') if recap_plugin_active: public_key = config.plugin_recaptcha.public_key private_key = config.plugin_recaptcha.private_key recap = recaptcha.Recaptcha(public_key, private_key) if not recap.validate(): return 'Recaptcha solution was incorrect. Please <a href="javascript:history.back()">go back</a> and try again.' f = forms.Register() if not f.validates(i): return render['account/create'](f) if i.agreement != "yes": f.note = utils.get_error("account_create_tos_not_selected") return render['account/create'](f) try: accounts.register(username=i.username, email=i.email, password=i.password, displayname=i.displayname) except ClientException, e: f.note = str(e) return render['account/create'](f)
def _is_valid_item(self, itemid, metadata): # Not a book, or scan not complete or no images uploaded if metadata.get("mediatype") != "texts" or metadata.get("repub_state", "4") not in ["4", "6"] or "imagecount" not in metadata: return False # items start with these prefixes are not books ignore_prefixes = config.get("ia_ignore_prefixes", []) for prefix in ignore_prefixes: # ignore all JSTOR items if itemid.startswith(prefix): return False # Anand - Oct 2013 # If an item is with noindex=true and it is not marked as lending or printdisabled, ignore it. # It would have been marked as noindex=true for some reason. collections = metadata.get("collection", []) if not isinstance(collections, list): collections = [collections] if metadata.get("noindex") == "true" \ and "printdisabled" not in collections \ and "inlibrary" not in collections \ and "lendinglibrary" not in collections: return return True
def GET(self): i = web.input(name='') logname = i.name filepath = config.get('errorlog', 'errors') + '/'+ logname + '.html' if os.path.exists(filepath): with open(filepath) as f: return f.read()
def GET(self): try: if "counts_db" in config.admin: stats = admin.get_stats() else: stats = None except Exception: logger.error("Error in getting stats", exc_info=True) stats = None blog_posts = get_blog_feeds() lending_list = config.get("home", {}).get("lending_list") returncart_list = config.get("home", {}).get("returncart_list") return render_template( "home/index", stats=stats, blog_posts=blog_posts, lending_list=lending_list, returncart_list=returncart_list )
def memcache(self): servers = config.get("memcache_servers", None) if servers: return olmemcache.Client(servers) else: web.debug("Could not find memcache_servers in the configuration. Used dummy memcache.") import mockcache return mockcache.Client()
def __init__(self, region=None, library=None, collection=None, subject=None): self.base_url = "http://%s/solr" % config.get("stats_solr") self.region = region self.library = library self.collection = collection self.subject = subject self._library_titles = None self._facet_counts = None
def _updates_view(self, **kw): view_url = config.get("lists", {}).get("updates_view") if not view_url: return [] kw['stale'] = 'ok' view = couchdb.client.PermanentView(view_url, "updates_view") return view(**kw)
def get_results(q, offset=0, limit=100): q = escape_bracket(q) solr_select = solr_edition_select_url + "?q.op=AND&q=%s&start=%d&rows=%d&fl=*&qt=standard&wt=json" % (web.urlquote(q), offset, limit) if config.get('single_core_solr'): solr_select += '&fq=type:edition' return run_solr_search(solr_select)
def GET(self): servers = config.get("plugin_admin", {}).get("webservers", []) if servers: body = "".join(self.reload(servers)) else: body = "No webservers specified in the configuration file." return render_template("message", "Reload", body)
def get_ia_db(): """Metadata API is slow. Talk to archive.org database directly if it is specified in the configuration. """ if not config.get("ia_db"): return global _ia_db if not _ia_db: settings = config.ia_db host = settings['host'] db = settings['db'] user = settings['user'] pw = os.popen(settings['pw_file']).read().strip() _ia_db = web.database(dbn="postgres", host=host, db=db, user=user, pw=pw) return _ia_db
def _get_memcache(self): if self._memcache is None: servers = config.get("memcache_servers") if servers: self._memcache = memcache.Client(servers) else: web.debug( "Could not find memcache_servers in the configuration. Used dummy memcache." ) try: import mockcache # Only supports legacy Python self._memcache = mockcache.Client() except ImportError: # Python 3 from pymemcache.test.utils import MockMemcacheClient self._memcache = MockMemcacheClient() return self._memcache
def add_cover(cover_url, ekey, account=None): """ Adds a cover to coverstore and returns the cover id. :param str cover_url: URL of cover image :param str ekey: Edition key /book/OL..M :rtype: int :return: Cover id """ olid = ekey.split("/")[-1] coverstore_url = config.get('coverstore_url').rstrip('/') upload_url = coverstore_url + '/b/upload2' if upload_url.startswith("//"): upload_url = "{0}:{1}".format(web.ctx.get("protocol", "http"), upload_url) user = account or accounts.get_current_user() params = { 'author': user.get('key') or user.get('_key'), 'data': None, 'source_url': cover_url, 'olid': olid, 'ip': web.ctx.ip, } reply = None for attempt in range(10): try: res = urllib.urlopen(upload_url, urllib.urlencode(params)) except IOError: print('retry, attempt', attempt) sleep(2) continue body = res.read() if res.getcode() == 500: raise CoverNotSaved(body) if body not in ['', 'None']: reply = json.loads(body) if res.getcode() == 200 and 'id' in reply: break print('retry, attempt', attempt) sleep(2) if not reply or reply.get('message') == 'Invalid URL': return cover_id = int(reply['id']) return cover_id
def POST(self): # if not support_db: # return "Couldn't initialise connection to support database" form = web.input() email = form.get("email", "") topic = form.get("topic", "") description = form.get("question", "") url = form.get("url", "") user = accounts.get_current_user() useragent = web.ctx.env.get("HTTP_USER_AGENT", "") if not all([email, topic, description]): return "" default_assignees = config.get("support_default_assignees", {}) topic_key = str(topic.replace(" ", "_").lower()) if topic_key in default_assignees: # This is set to False to prevent cases from being created # even if there is a designated assignee. This prevents # the database from being updated. create_case = False assignee = default_assignees.get(topic_key) else: create_case = False assignee = default_assignees.get("default", "*****@*****.**") if create_case: c = support_db.create_case( creator_name=user and user.get_name() or "", creator_email=email, creator_useragent=useragent, creator_username=user and user.get_username() or "", subject=topic, description=description, url=url, assignee=assignee) stats.increment("support.all") else: stats.increment("support.all") subject = "Support case *%s*" % topic message = "A new support case has been filed\n\nTopic: %s\n\nDescription:\n%s" % ( topic, description) web.sendmail(email, assignee, subject, message) return render_template("email/case_created", assignee)
def POST(self): form = web.input() email = form.get("email", "") topic = form.get("topic", "") description = form.get("question", "") url = form.get("url", "") user = accounts.get_current_user() useragent = web.ctx.env.get("HTTP_USER_AGENT","") if not all([email, topic, description]): return "" hashed_ip = hashlib.md5(web.ctx.ip.encode('utf-8')).hexdigest() has_emailed_recently = get_memcache().get('contact-POST-%s' % hashed_ip) if has_emailed_recently: recap = get_recaptcha() if recap and not recap.validate(): return render_template( "message.html", 'Recaptcha solution was incorrect', ('Please <a href="javascript:history.back()">go back</a> and try ' 'again.')) default_assignees = config.get("support_default_assignees",{}) topic_key = str(topic.replace(" ","_").lower()) if topic_key in default_assignees: assignee = default_assignees.get(topic_key) else: assignee = default_assignees.get("default", "*****@*****.**") stats.increment("ol.support.all") subject = "Support case *%s*"%topic url = web.ctx.home + url displayname = user and user.get_name() or "" username = user and user.get_username() or "" message = SUPPORT_EMAIL_TEMPLATE % locals() sendmail(email, assignee, subject, message) get_memcache().set( 'contact-POST-%s' % hashed_ip, "true", time=15 * MINUTE_SECS ) return render_template("email/case_created", assignee)
def get_item_status(cls, itemid, metadata): """Returns the status of the item related to importing it in OL. Possible return values are: * ok * not-texts-item * bad-repub-state * no-imagecount * prefix-blacklisted * noindex-true """ # Not a book, or scan not complete or no images uploaded if metadata.get("mediatype") != "texts": return "not-texts-item" if metadata.get("repub_state", "4") not in ["4", "6"]: return "bad-repub-state" if "imagecount" not in metadata: return "no-imagecount" # items start with these prefixes are not books ignore_prefixes = config.get("ia_ignore_prefixes", []) for prefix in ignore_prefixes: # ignore all JSTOR items if itemid.startswith(prefix): return "prefix-blacklisted" # Anand - Oct 2013 # If an item is with noindex=true and it is not marked as lending or printdisabled, ignore it. # It would have been marked as noindex=true for some reason. collections = metadata.get("collection", []) if not isinstance(collections, list): collections = [collections] if metadata.get("noindex") == "true" \ and "printdisabled" not in collections \ and "inlibrary" not in collections \ and "lendinglibrary" not in collections: return "noindex-true" return "ok"
def setup_ol_config(openlibrary_config_file): """Setup OL configuration. Required for storing counts in store. """ import infogami from infogami import config config.plugin_path += ['openlibrary.plugins'] config.site = "openlibrary.org" infogami.load_config(openlibrary_config_file) infogami.config.infobase_parameters = dict(type="ol") if config.get("infobase_config_file"): dir = os.path.dirname(openlibrary_config_file) path = os.path.join(dir, config.infobase_config_file) config.infobase = yaml.safe_load(open(path).read()) infogami._setup()
def GET(self, key): i = web.input(v=None, _method="GET") v = i.v and safeint(i.v, None) if not web.ctx.site.can_write(key): return render_template("permission_denied", web.ctx.fullpath, "Permission denied to edit " + key + ".") work = web.ctx.site.get(key, v) if work is None: raise web.notfound() recap_plugin_active = 'recaptcha' in config.get('plugins') if recap_plugin_active: public_key = config.plugin_recaptcha.public_key private_key = config.plugin_recaptcha.private_key recap = recaptcha.Recaptcha(public_key, private_key) else: recap = None return render_template('books/edit', work, recaptcha=recap)
def main(): if "--config" in sys.argv: index = sys.argv.index("--config") configfile = sys.argv[index+1] del sys.argv[index:index+2] else: import os configfile = os.path.abspath(os.path.join( os.path.dirname(__file__), os.pardir, os.pardir, 'openlibrary', 'conf', 'openlibrary.yml')) load_config(configfile) from infogami import config cmd = sys.argv[1] args, flags = [], {'servername': config.get('servername', 'https://openlibrary.org')} for i in sys.argv[2:]: if i.startswith('--'): flags[i[2:]] = True else: args.append(i) if cmd == "import-retro": start, stop = (int(a) for a in args) if \ (args and len(args) == 2) else (None, None) return retroactive_import(start=start, stop=stop, servername=servername) if cmd == "import-ocaids": return import_ocaids(*args, **flags) if cmd == "add-items": return add_items(args) elif cmd == "add-new-scans": return add_new_scans(args) elif cmd == "import-batch": return import_batch(args, **kwargs) elif cmd == "import-all": return import_all(args, **kwargs) elif cmd == "import-item": return import_item(args, **kwargs) else: logger.error("Unknown command: %s", cmd)
def _load(): """Imports the files from the plugins directories and loads templates.""" global plugins plugins = [_make_plugin('core')] if config.plugins is not None: plugins += [_make_plugin(p) for p in config.plugins] else: for p in config.plugin_path: m = __import__(p) root = os.path.dirname(m) plugins += _list_plugins(root) for plugin in plugins: template.load_templates(plugin.path, lazy=True) macro.load_macros(plugin.path, lazy=True) i18n.load_strings(plugin.path) __import__(plugin.module + '.code', globals(), locals(), ['plugins']) features.set_feature_flags(config.get("features", {}))
def get_ia_db(configfile=None): """Metadata API is slow. Talk to archive.org database directly if it is specified in the global configuration or if a configfile is provided. """ if configfile: from openlibrary.config import load_config load_config(configfile) if not config.get("ia_db"): return None global _ia_db if not _ia_db: settings = config.ia_db host = settings['host'] db = settings['db'] user = settings['user'] pw = os.popen(settings['pw_file']).read().strip() _ia_db = web.database(dbn="postgres", host=host, db=db, user=user, pw=pw) return _ia_db
def work_object(w): # called by works_by_author ia = w.get('ia', []) if config.get("single_core_solr"): key = w['key'] else: key = '/works/' + w['key'] obj = dict( authors=[ web.storage(key='/authors/' + k, name=n) for k, n in zip(w['author_key'], w['author_name']) ], edition_count=w['edition_count'], key=key, title=w['title'], public_scan=w.get('public_scan_b', bool(ia)), lending_edition=w.get('lending_edition_s', ''), lending_identifier=w.get('lending_identifier_s', ''), overdrive=(w['overdrive_s'].split(';') if 'overdrive_s' in w else []), collections=set(w['ia_collection_s'].split(';') if 'ia_collection_s' in w else []), url=key + '/' + urlsafe(w['title']), cover_edition_key=w.get('cover_edition_key'), first_publish_year=(w['first_publish_year'] if 'first_publish_year' in w else None), ia=w.get('ia', []), cover_i=w.get('cover_i')) if obj['lending_identifier']: doc = web.ctx.site.store.get("ebooks/" + obj['lending_identifier']) or {} obj['checked_out'] = doc.get("borrowed") == "true" else: obj['checked_out'] = False for f in 'has_fulltext', 'subtitle': if w.get(f): obj[f] = w[f] return web.storage(obj)
def POST(self): i = web.input() seq = web.ctx.site.seq.next_value("libraries") doc = dict(i) doc.update({ "_key": "libraries/pending-%d" % seq, "type": "library", "current_status": "pending", "registered_on": datetime.datetime.utcnow().isoformat() }) web.ctx.site.store[doc['_key']] = doc self.sendmail(i.contact_email, render_template("libraries/email_confirmation")) if config.get("libraries_admin_email"): self.sendmail(config.libraries_admin_email, render_template("libraries/email_notification", i)) return render_template("libraries/postadd")
def _setup(): # if config.db_parameters is None: # raise Exception('infogami.config.db_parameters is not specified') if config.site is None: raise Exception('infogami.config.site is not specified') if config.bugfixer: web.webapi.internalerror = web.emailerrors(config.bugfixer, web.debugerror) web.internalerror = web.webapi.internalerror web.config.db_parameters = config.db_parameters web.config.db_printing = config.db_printing if config.get("debug", None) is not None: web.config.debug = config.debug from infogami.utils import delegate delegate._load() # setup context etc. delegate.fakeload()
def setup(): """Do required initialization""" # monkey-patch get_markdown to use OL Flavored Markdown view.get_markdown = get_markdown # Provide alternate implementations for websafe and commify web.websafe = websafe web.template.Template.FILTERS['.html'] = websafe web.template.Template.FILTERS['.xml'] = websafe web.commify = commify web.template.Template.globals.update({ 'HTML': HTML, 'request': Request() }) from openlibrary.core import helpers as h web.template.Template.globals.update(h.helpers) if config.get('use_gzip') == True: config.middleware.append(GZipMiddleware)
def work_search(query, limit=20, offset=0, **kw): """Search for works.""" kw.setdefault("doc_wrapper", work_wrapper) fields = [ "key", "author_name", "author_key", "title", "edition_count", "ia", "cover_edition_key", "has_fulltext", "subject", "ia_collection_s", "public_scan_b", "overdrive_s", "lending_edition_s", "lending_identifier_s", ] kw.setdefault("fields", fields) if config.get('single_core_solr'): kw.setdefault("fq", "type:work") query = process_work_query(query) solr = get_works_solr() stats.begin("solr", query=query, start=offset, rows=limit, kw=kw) try: result = solr.select(query, start=offset, rows=limit, **kw) except Exception as e: logging.getLogger("openlibrary").exception("Failed solr query") return None finally: stats.end() return result
def POST(self): i = web.input(title="", author_name="", author_key="", publisher="", publish_date="", id_name="", id_value="", _test="false") recap_plugin_active = 'recaptcha' in config.get('plugins') if recap_plugin_active and not web.ctx.site.get_user(): public_key = config.plugin_recaptcha.public_key private_key = config.plugin_recaptcha.private_key recap = recaptcha.Recaptcha(public_key, private_key) if not recap.validate(): return 'Recaptcha solution was incorrect. Please <a href="javascript:history.back()">go back</a> and try again.' saveutil = DocSaveHelper() match = self.find_matches(saveutil, i) if i._test == "true" and not isinstance(match, list): if match: return 'Matched <a href="%s">%s</a>' % (match.key, match.key) else: return 'No match found' if isinstance(match, list): # multiple matches return render_template("books/check", i, match) elif match and match.key.startswith('/books'): # work match and edition match return self.work_edition_match(match) elif match and match.key.startswith('/works'): # work match but not edition work = match return self.work_match(saveutil, work, i) else: # no match return self.no_match(saveutil, i)
def GET(self, key): i = web.input(v=None) v = i.v and safeint(i.v, None) if not web.ctx.site.can_write(key): return render_template("permission_denied", web.ctx.fullpath, "Permission denied to edit " + key + ".") edition = web.ctx.site.get(key, v) if edition is None: raise web.notfound() work = edition.works and edition.works[0] # HACK: create dummy work when work is not available to make edit form work work = work or web.ctx.site.new('', {'key': '', 'type': {'key': '/type/work'}, 'title': edition.title}) recap_plugin_active = 'recaptcha' in config.get('plugins') #check to see if account is more than two years old old_user = False user = web.ctx.site.get_user() account = user and user.get_account() if account: create_dt = account.creation_time() now_dt = datetime.datetime.utcnow() delta = now_dt - create_dt if delta.days > 365*2: old_user = True if recap_plugin_active and not old_user: public_key = config.plugin_recaptcha.public_key private_key = config.plugin_recaptcha.private_key recap = recaptcha.Recaptcha(public_key, private_key) else: recap = None return render_template('books/edit', work, edition, recaptcha=recap)
def _sendmail(to, msg, cc=None, frm=None): cc = cc or [] frm = frm or config.from_address if config.get('dummy_sendmail'): message = ( '' + 'To: ' + to + '\n' + 'From:' + config.from_address + '\n' + 'Subject:' + msg.subject + '\n' + '\n' + web.safestr(msg) ) print("sending email", message, file=web.debug) else: web.sendmail( frm, to, subject=msg.subject.strip(), message=web.safestr(msg), cc=cc )
from openlibrary.core import ia import web import base64 import json import re import urllib import import_opds import import_rdf import import_edition_builder from lxml import etree import logging IA_BASE_URL = config.get('ia_base_url') logger = logging.getLogger("openlibrary.importapi") class DataError(ValueError): pass def parse_meta_headers(edition_builder): # parse S3-style http headers # we don't yet support augmenting complex fields like author or language # string_keys = ['title', 'title_prefix', 'description'] re_meta = re.compile('HTTP_X_ARCHIVE_META(?:\d{2})?_(.*)') for k, v in web.ctx.env.items(): m = re_meta.match(k) if m: meta_key = m.group(1).lower()
def get_to(): if config.get('plugin_scod') is not None: return config.plugin_scod.get('email_recipients', []) else: return config.get('scan_email_recipients', [])
def bookreader_host(): return config.get('bookreader_host', '')
def get_admin_couchdb(): db_url = config.get("admin", {}).get("counts_db") return db_url and couchdb.Database(db_url)
def get_doc(doc): # called from work_search template e_ia = doc.find("arr[@name='ia']") first_pub = None e_first_pub = doc.find("int[@name='first_publish_year']") if e_first_pub is not None: first_pub = e_first_pub.text e_first_edition = doc.find("str[@name='first_edition']") first_edition = None if e_first_edition is not None: first_edition = e_first_edition.text work_subtitle = None e_subtitle = doc.find("str[@name='subtitle']") if e_subtitle is not None: work_subtitle = e_subtitle.text if doc.find("arr[@name='author_key']") is None: assert doc.find("arr[@name='author_name']") is None authors = [] else: ak = [e.text for e in doc.find("arr[@name='author_key']")] an = [e.text for e in doc.find("arr[@name='author_name']")] authors = [ web.storage(key=key, name=name, url="/authors/%s/%s" % (key, (urlsafe(name) if name is not None else 'noname'))) for key, name in zip(ak, an) ] cover = doc.find("str[@name='cover_edition_key']") e_public_scan = doc.find("bool[@name='public_scan_b']") e_lending_edition = doc.find("str[@name='lending_edition_s']") e_lending_identifier = doc.find("str[@name='lending_identifier_s']") e_collection = doc.find("str[@name='ia_collection_s']") collections = set() if e_collection is not None: collections = set(e_collection.text.split(';')) doc = web.storage( key=doc.find("str[@name='key']").text, title=doc.find("str[@name='title']").text, edition_count=int(doc.find("int[@name='edition_count']").text), ia=[e.text for e in (e_ia if e_ia is not None else [])], has_fulltext=(doc.find("bool[@name='has_fulltext']").text == 'true'), public_scan=((e_public_scan.text == 'true') if e_public_scan is not None else (e_ia is not None)), lending_edition=(e_lending_edition.text if e_lending_edition is not None else None), lending_identifier=(e_lending_identifier and e_lending_identifier.text), collections=collections, authors=authors, first_publish_year=first_pub, first_edition=first_edition, subtitle=work_subtitle, cover_edition_key=(cover.text if cover is not None else None), ) if config.get("single_core_solr"): doc.url = doc.key + '/' + urlsafe(doc.title) else: doc.url = '/works/' + doc.key + '/' + urlsafe(doc.title) if not doc.public_scan and doc.lending_identifier: store_doc = web.ctx.site.store.get("ebooks/" + doc.lending_identifier) or {} doc.checked_out = store_doc.get("borrowed") == "true" elif not doc.public_scan and doc.lending_edition: store_doc = web.ctx.site.store.get("ebooks/books/" + doc.lending_edition) or {} doc.checked_out = store_doc.get("borrowed") == "true" else: doc.checked_out = "false" return doc
def run_solr_query(param={}, rows=100, page=1, sort=None, spellcheck_count=None, offset=None, fields=None): # called by do_search if spellcheck_count == None: spellcheck_count = default_spellcheck_count # use page when offset is not specified if offset is None: offset = rows * (page - 1) (q_list, use_dismax) = build_q_list(param) if fields is None: fields = [ 'key', 'author_name', 'author_key', 'title', 'subtitle', 'edition_count', 'ia', 'has_fulltext', 'first_publish_year', 'cover_i', 'cover_edition_key', 'public_scan_b', 'lending_edition_s', 'ia_collection_s' ] fl = ','.join(fields) solr_select = solr_select_url + "?q.op=AND&start=%d&rows=%d&fl=%s" % ( offset, rows, fl) if q_list: if use_dismax: q = web.urlquote(' '.join(q_list)) solr_select += "&defType=dismax&qf=text+title^5+author_name^5&bf=sqrt(edition_count)^10" else: q = web.urlquote(' '.join(q_list + ['_val_:"sqrt(edition_count)"^10'])) solr_select += "&q=%s" % q solr_select += '&spellcheck=true&spellcheck.count=%d' % spellcheck_count solr_select += "&facet=true&" + '&'.join("facet.field=" + f for f in facet_fields) if 'public_scan' in param: v = param.pop('public_scan').lower() if v in ('true', 'false'): if v == 'false': # also constrain on print disabled since the index may not be in sync param.setdefault('print_disabled', 'false') solr_select += '&fq=public_scan_b:%s' % v if 'print_disabled' in param: v = param.pop('print_disabled').lower() if v in ('true', 'false'): solr_select += '&fq=%ssubject_key:protected_daisy' % ( '-' if v == 'false' else '') k = 'has_fulltext' if k in param: v = param[k].lower() if v not in ('true', 'false'): del param[k] param[k] == v solr_select += '&fq=%s:%s' % (k, v) for k in facet_list_fields: if k == 'author_facet': k = 'author_key' if k not in param: continue v = param[k] solr_select += ''.join('&fq=%s:"%s"' % (k, url_quote(l)) for l in v if l) if sort: solr_select += "&sort=" + url_quote(sort) solr_select += '&wt=' + url_quote(param.get('wt', 'standard')) # For single-core solr, filter the results by type:work if config.get("single_core_solr"): solr_select += "&fq=type:work" solr_result = execute_solr_query(solr_select) if solr_result is None: return (None, None, None) reply = solr_result.read() return (reply, solr_select, q_list)
except memcache.Client.MemcachedKeyError: return None return value and self.decompress(value) def get_multi(self, keys): keys = [adapter.convert_key(k) for k in keys] keys = [web.safestr(k) for k in keys] d = self._client.get_multi(keys) return dict( (web.safeunicode(adapter.unconvert_key(k)), self.decompress(v)) for k, v in d.items()) if config.get('upstream_memcache_servers'): olmemcache.Client = UpstreamMemcacheClient # set config.memcache_servers only after olmemcache.Client is updated config.memcache_servers = config.upstream_memcache_servers def _get_recent_changes(): site = web.ctx.get('site') or delegate.create_site() web.ctx.setdefault("ip", "127.0.0.1") # The recentchanges can have multiple revisions for a document if it has been modified more than once. # Take only the most recent revision in that case. visited = set() def is_visited(key): if key in visited:
def get_coverstore_url(): return config.get('coverstore_url', 'https://covers.openlibrary.org').rstrip('/')
from openlibrary.accounts.model import OpenLibraryAccount from openlibrary import accounts from openlibrary.core import ab from lxml import etree import acs4 logger = logging.getLogger("openlibrary.borrow") ########## Constants lending_library_subject = u'Lending library' in_library_subject = u'In library' lending_subjects = set([lending_library_subject, in_library_subject]) loanstatus_url = config.get('loanstatus_url') # ACS4 resource ids start with 'urn:uuid:'. The meta.xml on archive.org # adds 'acs:epub:' or 'acs:pdf:' to distinguish the file type. acs_resource_id_prefixes = ['urn:uuid:', 'acs:epub:', 'acs:pdf:'] # Max loans a user can have at once user_max_loans = 5 # When we generate a loan offer (.acsm) for a user we assume that the loan has occurred. # Once the loan fulfillment inside Digital Editions the book status server will know # the loan has occurred. We allow this timeout so that we don't delete the OL loan # record before fulfillment because we can't find it in the book status server. # $$$ If a user borrows an ACS4 book and immediately returns book loan will show as # "not yet downloaded" for the duration of the timeout. # BookReader loan status is always current.
def get_coverstore_url(): """Returns the base url of coverstore by looking at the config.""" return config.get('coverstore_url', 'http://covers.openlibrary.org').rstrip('/')
def affiliate_id(affiliate): return config.get('affiliate_ids', {}).get(affiliate, '')