def _cache_parsed_xml(self): """ Caches every Journal and Publisher found in the xml """ if not self.parsed: return outcome = self.xml['header']['outcome'] is 'notFound' if outcome is 'notFound' or outcome is 'failed': return if self.xml['header']['outcome'] == 'singleJournal' \ or self.xml['header']['outcome'] == 'uniqueZetoc': journal = self.xml['journals']['journal'] cache.set("journal:" + journal['jtitle'].lower(), journal, 999999999999) if self.xml['header']['outcome'] != 'uniqueZetoc': # if the publisher has been indexed by RoMEO publisher = self.xml['publishers']['publisher'] # Associate a Journal with a Publisher key in cache cache.set("journal-publisher:" + journal['jtitle'].lower(), "publisher:" + publisher['name'].lower(), 999999999999) elif self.xml['journals'] is not None: for journal in self.xml['journals']['journal']: cache.set("journal:" + journal['jtitle'].lower(), journal, 999999999999) if self.xml['header']['numhits'] == '1' \ and self.xml['header']['outcome'] != 'uniqueZetoc': publisher = self.xml['publishers']['publisher'] cache.set("publisher:" + publisher['name'].lower(), publisher, 999999999999) elif self.xml['publishers'] is not None: for publisher in self.xml['publishers']['publisher']: cache.set("publisher:" + publisher['name'].lower(), publisher, None)
def robotupload_callback(): """Handle callback from robotupload. If robotupload was successful caches the workflow object id that corresponds to the uploaded record, so the workflow can be resumed when webcoll finish processing that record. If robotupload encountered an error sends an email to site administrator informing him about the error.""" request_data = request.get_json() id_object = request_data.get("nonce", "") results = request_data.get("results", []) status = False for result in results: status = result.get('success', False) if status: recid = result.get('recid') pending_records = cache.get("pending_records") or dict() pending_records[str(recid)] = str(id_object) cache.set("pending_records", pending_records, timeout=cfg["PENDING_RECORDS_CACHE_TIMEOUT"]) else: from invenio.ext.email import send_email body = ("There was an error when uploading the " "submission with id: %s.\n" % id_object) body += "Error message:\n" body += result.get('error_message', '') send_email( cfg["CFG_SITE_SUPPORT_EMAIL"], cfg["CFG_SITE_ADMIN_EMAIL"], 'BATCHUPLOAD ERROR', body ) return jsonify({"result": status})
def save(self): """Save modified data permanently for logged users.""" if not self.is_guest and self.modified: timeout = current_app.config.get( 'CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT', 0)*3600 cache.set(self.get_key(), dict(self.info), timeout=timeout)
def set_single_item(self, journal=None, publisher=None): """ Used to initialize the parser with items retrieved from cache Note: if both a journal and a publisher are defined the publisher is associated with the journal """ self.xml = dict() self.xml['header'] = dict() self.xml['header']['outcome'] = '' if journal is not None: self.xml['journals'] = dict() self.xml['journals']['journal'] = journal self.xml['header']['numhits'] = '1' self.parsed = True self.single_item = True if publisher is not None: # Associate a Journal with a Publisher key in cache self.xml['header']['outcome'] = 'singleJournal' cache.set("journal-publisher:" + journal['jtitle'].lower(), "publisher:" + publisher['name'].lower(), 999999999999) elif publisher is not None: self.xml['header']['outcome'] = 'publisherFound' self.xml['header']['numhits'] = '1' self.xml['publishers'] = dict() self.xml['publishers']['publisher'] = publisher self.single_item = True self.parsed = True
def record_actions(recid=None, project_id=None, action_name='', action=None, msg='', redirect_url=None): uid = current_user.get_id() record = get_record(recid) if not record: abort(404) # either the use is allowed in the project # or is the owner if project_id: project = Project.query.get_or_404(project_id) if not project.is_user_allowed(): abort(401) else: if uid != int(record.get('owner', {}).get('id', -1)): abort(401) # crazy invenio stuff, cache actions so they dont get duplicated key = action_key(recid, action_name) cache_action = cache.get(key) if cache_action == action_name: return json_error(400, ' '.join([msg, 'Please wait some minutes.'])) # Set 5 min cache to allow bibupload/bibreformat to finish cache.set(key, action_name, timeout=5 * 60) r = action(record) if r is not None: return r if redirect_url is None: redirect_url = url_for('record.metadata', recid=recid) return jsonify({'status': 'ok', 'redirect': redirect_url})
def robotupload_callback(): """Handle callback from robotupload. If robotupload was successful caches the workflow object id that corresponds to the uploaded record, so the workflow can be resumed when webcoll finish processing that record. If robotupload encountered an error sends an email to site administrator informing him about the error.""" request_data = request.get_json() id_object = request_data.get("nonce", "") results = request_data.get("results", []) for result in results: status = result.get('success', False) if status: recid = result.get('recid') pending_records = cache.get("pending_records") or dict() pending_records[str(recid)] = str(id_object) cache.set("pending_records", pending_records) else: from invenio.config import CFG_SITE_ADMIN_EMAIL from invenio.ext.email import send_email from invenio.config import CFG_SITE_SUPPORT_EMAIL body = ("There was an error when uploading the " "submission with id: %s.\n" % id_object) body += "Error message:\n" body += result.get('error_message', '') send_email( CFG_SITE_SUPPORT_EMAIL, CFG_SITE_ADMIN_EMAIL, 'BATCHUPLOAD ERROR', body ) return jsonify({"result": status})
def save(self): """Save modified data pernamently for logged users.""" if not self.is_guest and self.modified: timeout = current_app.config.get( 'CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT', 0)*3600 cache.set(self.get_key(), dict(self.info), timeout=timeout)
def curate(): """Index page with uploader and list of existing depositions.""" from invenio.legacy.search_engine import get_fieldvalues action = request.values.get('action') community_id = request.values.get('collection') recid = request.values.get('recid', 0, type=int) # Allowed actions if action not in ['accept', 'reject', 'remove']: abort(400) # Check recid if not recid: abort(400) recid = int(recid) # Does community exists u = Community.query.filter_by(id=community_id).first() if not u: abort(400) # Check permission to perform action on this record # - Accept and reject is done by community owner # - Remove is done by record owner if action in [ 'accept', 'reject', ]: if u.id_user != current_user.get_id(): abort(403) elif action == 'remove': try: email = get_fieldvalues(recid, '8560_f')[0] if email != current_user['email']: abort(403) # inform interested parties of removing collection/community curate_record.send(u, action=action, recid=recid, user=current_user) except (IndexError, KeyError): abort(403) # Prevent double requests (i.e. give bibupload a chance to make the change) key = "community_curate:%s_%s" % (community_id, recid) cache_action = cache.get(key) if cache_action == action or cache_action in ['reject', 'remove']: return jsonify({'status': 'success', 'cache': 1}) elif cache_action: # Operation under way, but the same action return jsonify({'status': 'failure', 'cache': 1}) if action == "accept": res = u.accept_record(recid) elif action == "reject" or action == "remove": res = u.reject_record(recid) if res: # Set 5 min cache to allow bibupload/webcoll to finish cache.set(key, action, timeout=5 * 60) return jsonify({'status': 'success', 'cache': 0}) else: return jsonify({'status': 'failure', 'cache': 0})
def curate(): """ Index page with uploader and list of existing depositions """ from invenio.legacy.search_engine import get_fieldvalues action = request.values.get('action') community_id = request.values.get('collection') recid = request.values.get('recid', 0, type=int) # Allowed actions if action not in ['accept', 'reject', 'remove']: abort(400) # Check recid if not recid: abort(400) recid = int(recid) # Does community exists u = Community.query.filter_by(id=community_id).first() if not u: abort(400) # Check permission to perform action on this record # - Accept and reject is done by community owner # - Remove is done by record owner if action in ['accept', 'reject', ]: if u.id_user != current_user.get_id(): abort(403) elif action == 'remove': try: email = get_fieldvalues(recid, '8560_f')[0] if email != current_user['email']: abort(403) # inform interested parties of removing collection/community curate_record.send(u, action=action, recid=recid, user=current_user) except (IndexError, KeyError): abort(403) # Prevent double requests (i.e. give bibupload a chance to make the change) key = "community_curate:%s_%s" % (community_id, recid) cache_action = cache.get(key) if cache_action == action or cache_action in ['reject', 'remove']: return jsonify({'status': 'success', 'cache': 1}) elif cache_action: # Operation under way, but the same action return jsonify({'status': 'failure', 'cache': 1}) if action == "accept": res = u.accept_record(recid) elif action == "reject" or action == "remove": res = u.reject_record(recid) if res: # Set 5 min cache to allow bibupload/webcoll to finish cache.set(key, action, timeout=5*60) return jsonify({'status': 'success', 'cache': 0}) else: return jsonify({'status': 'failure', 'cache': 0})
def get_formatted_holdingpen_object(bwo, date_format='%Y-%m-%d %H:%M:%S.%f'): """Return the formatted output, from cache if available.""" results = cache.get("workflows_holdingpen_{0}".format(bwo.id)) if results: results = msgpack.loads(cache.get("workflows_holdingpen_{0}".format(bwo.id))) if results["date"] == bwo.modified.strftime(date_format): return results results = generate_formatted_holdingpen_object(bwo) if results: cache.set("workflows_holdingpen_{0}".format(bwo.id), msgpack.dumps(results)) return results
def parse_url(self, url): self.url = url # example #url = 'http://www.sherpa.ac.uk/romeo/api29.php?jtitle=Annals%20of%20Physics' found_journal = url.find("jtitle=") found_publisher = url.find("pub=") if found_journal != -1: self.search_type = "journal_search" self.query = url[found_journal + 7:(len(url) - 15)] elif found_publisher != -1: self.search_type = "publisher_search" found_publisher = url.find("pub=") self.query = url[found_publisher + 4:len(url)] else: self.search_type = "issn_search" found_publisher = url.find("issn=") self.query = url[found_publisher + 4:len(url)] cached_xml = cache.get(self.search_type + ":" + self.query.lower()) if cached_xml is None: try: self.data = urllib2.urlopen(url).read() except urllib2.HTTPError: self.error = True return try: root = ElementTree.XML(self.data) except SyntaxError: self.error = True return self.xml = XmlDictConfig(root) outcome = self.xml['header']['outcome'] if outcome != 'failed' and outcome != 'notFound': cache.set( self.search_type + ":" + self.query.lower(), self.xml, 999999999999) else: self.xml = cached_xml #self.data = cached_xml #root = ElementTree.XML(self.data) #self.xml = XmlDictConfig(root) if self.xml['header']['outcome'] == 'failed': self.error = True self.error_message = self.xml['header']['message'] self.parsed = True self._cache_parsed_xml()
def get_formatted_holdingpen_object(bwo, date_format='%Y-%m-%d %H:%M:%S.%f'): """Return the formatted output, from cache if available.""" results = cache.get("workflows_holdingpen_{0}".format(bwo.id)) if results: results = msgpack.loads( cache.get("workflows_holdingpen_{0}".format(bwo.id))) if results["date"] == bwo.modified.strftime(date_format): return results results = generate_formatted_holdingpen_object(bwo) if results: cache.set("workflows_holdingpen_{0}".format(bwo.id), msgpack.dumps(results), timeout=current_app.config.get( "WORKFLOWS_HOLDING_PEN_CACHE_TIMEOUT")) return results
def parse_url(self, url): self.url = url #example #url = 'http://www.sherpa.ac.uk/romeo/api29.php?jtitle=Annals%20of%20Physics' found_journal = url.find("jtitle=") found_publisher = url.find("pub=") if found_journal != -1: self.search_type = "journal_search" self.query = url[found_journal + 7:(len(url) - 15)] elif found_publisher != -1: self.search_type = "publisher_search" found_publisher = url.find("pub=") self.query = url[found_publisher + 4:len(url)] else: self.search_type = "issn_search" found_publisher = url.find("issn=") self.query = url[found_publisher + 4:len(url)] cached_xml = cache.get(self.search_type + ":" + self.query.lower()) if cached_xml is None: try: self.data = urllib2.urlopen(url).read() except urllib2.HTTPError: self.error = True return try: root = ElementTree.XML(self.data) except SyntaxError: self.error = True return self.xml = XmlDictConfig(root) outcome = self.xml['header']['outcome'] if outcome != 'failed' and outcome != 'notFound': cache.set(self.search_type + ":" + self.query.lower(), self.xml, 999999999999) else: self.xml = cached_xml #self.data = cached_xml #root = ElementTree.XML(self.data) #self.xml = XmlDictConfig(root) if self.xml['header']['outcome'] == 'failed': self.error = True self.error_message = self.xml['header']['message'] self.parsed = True self._cache_parsed_xml()
def get_formatted_holdingpen_object(bwo, date_format='%Y-%m-%d %H:%M:%S.%f'): """Return the formatted output, from cache if available.""" results = cache.get("workflows_holdingpen_{0}".format(bwo.id)) if results: results = msgpack.loads( cache.get( "workflows_holdingpen_{0}".format( bwo.id))) if results["date"] == bwo.modified.strftime(date_format): return results results = generate_formatted_holdingpen_object(bwo) if results: cache.set("workflows_holdingpen_{0}".format(bwo.id), msgpack.dumps(results), timeout=current_app.config.get( "WORKFLOWS_HOLDING_PEN_CACHE_TIMEOUT" )) return results
def webcoll_callback(): """Handle a callback from webcoll with the record ids processed. Expects the request data to contain a list of record ids in the recids field. """ recids = dict(request.form).get('recids', []) pending_records = cache.get("pending_records") or dict() for rid in recids: if rid in pending_records: objectid = pending_records[rid] workflow_object = BibWorkflowObject.query.get(objectid) extra_data = workflow_object.get_extra_data() extra_data['url'] = join(CFG_ROBOTUPLOAD_SUBMISSION_BASEURL, 'record', str(rid)) workflow_object.set_extra_data(extra_data) workflow_object.continue_workflow(delayed=True) del pending_records[rid] cache.set("pending_records", pending_records) return jsonify({"result": "success"})
def robotupload_callback(): """Handle callback from robotupload. If robotupload was successful caches the workflow object id that corresponds to the uploaded record, so the workflow can be resumed when webcoll finish processing that record. If robotupload encountered an error sends an email to site administrator informing him about the error.""" request_data = request.get_json() id_object = request_data.get("nonce", "") results = request_data.get("results", []) for result in results: status = result.get('success', False) if status: recid = result.get('recid') pending_records = cache.get("pending_records") if pending_records: pending_records = msgpack.loads(pending_records) pending_records[str(recid)] = id_object cache.set("pending_records", msgpack.dumps(pending_records)) else: cache.set("pending_records", msgpack.dumps({ str(recid): id_object })) else: from invenio.config import CFG_SITE_ADMIN_EMAIL from invenio.ext.email import send_email from invenio.config import CFG_SITE_SUPPORT_EMAIL body = ("There was an error when uploading the " "submission with id: %s.\n" % id_object) body += "Error message:\n" body += result.get('error_message', '') send_email( CFG_SITE_SUPPORT_EMAIL, CFG_SITE_ADMIN_EMAIL, 'BATCHUPLOAD ERROR', body ) return jsonify({"result": status})
def webcoll_callback(): """Handle a callback from webcoll with the record ids processed. Expects the request data to contain a list of record ids in the recids field. """ recids = dict(request.form).get('recids', []) try: pending_records = msgpack.loads(cache.get("pending_records")) except TypeError: pending_records = {} if pending_records: pending_records = msgpack.loads(pending_records) for rid in recids: if rid in pending_records: objectid = pending_records[rid] workflow_object = BibWorkflowObject.query.get(objectid) extra_data = workflow_object.get_extra_data() extra_data['url'] = join(CFG_SITE_URL, 'record', str(rid)) workflow_object.set_extra_data(extra_data) workflow_object.continue_workflow(delayed=True) del pending_records[rid] cache.set("pending_records", msgpack.dumps(pending_records)) return jsonify({"result": "success"})
def bst_webcoll_postprocess(recids=[]): """Parse recids to POST to remote server to alert that records are visible.""" if isinstance(recids, str): recids = recids.split(",") cached_ids = cache.get("webcoll_pending_recids") or [] recids += cached_ids if not cfg.get("CFG_WEBCOLL_POST_REQUEST_URL"): write_message("CFG_WEBCOLL_POST_REQUEST_URL is not set.") task_update_status('ERROR') return 1 if recids: write_message("Going to POST callback to {0}: {1} (total: {2})".format( cfg["CFG_WEBCOLL_POST_REQUEST_URL"], recids[:10], len(recids)) ) session = requests.Session() addapter = requests.adapters.HTTPAdapter(max_retries=3) session.mount(cfg["CFG_WEBCOLL_POST_REQUEST_URL"], addapter) response = session.post(cfg["CFG_WEBCOLL_POST_REQUEST_URL"], data={'recids': recids}) if response.ok: write_message("Post request sent successfully") cache.set("webcoll_pending_recids", []) else: write_message("Post request failed!") write_message(response.text) task_update_status('ERROR') cache.set("webcoll_pending_recids", recids) session.close() else: write_message("No recids to POST callback for to {0}.".format( cfg["CFG_WEBCOLL_POST_REQUEST_URL"], ))
def _precache(self, info, force=False): """Calculate prermitions for user actions. FIXME: compatibility layer only !!! """ CFG_BIBAUTHORID_ENABLED = current_app.config.get( 'CFG_BIBAUTHORID_ENABLED', False) # get autorization key acc_key = self.get_acc_key() acc = cache.get(acc_key) if not force and acc_key is not None and acc is not None: return acc # FIXME: acc_authorize_action should use flask request directly user_info = info user_info.update(self.req) from invenio.legacy.webuser import isUserSubmitter, isUserReferee, \ isUserAdmin, isUserSuperAdmin from invenio.modules.access.engine import acc_authorize_action from invenio.modules.access.control import acc_get_role_id, \ acc_is_user_in_role from invenio.modules.search.utils import \ get_permitted_restricted_collections data = {} data['precached_permitted_restricted_collections'] = \ get_permitted_restricted_collections(user_info) data['precached_usebaskets'] = acc_authorize_action( user_info, 'usebaskets')[0] == 0 data['precached_useloans'] = acc_authorize_action( user_info, 'useloans')[0] == 0 data['precached_usegroups'] = acc_authorize_action( user_info, 'usegroups')[0] == 0 data['precached_usealerts'] = acc_authorize_action( user_info, 'usealerts')[0] == 0 data['precached_usemessages'] = acc_authorize_action( user_info, 'usemessages')[0] == 0 data['precached_usestats'] = acc_authorize_action( user_info, 'runwebstatadmin')[0] == 0 try: data['precached_viewsubmissions'] = isUserSubmitter(user_info) except Exception: data['precached_viewsubmissions'] = None data['precached_useapprove'] = isUserReferee(user_info) data['precached_useadmin'] = isUserAdmin(user_info) data['precached_usesuperadmin'] = isUserSuperAdmin(user_info) data['precached_canseehiddenmarctags'] = acc_authorize_action( user_info, 'runbibedit')[0] == 0 usepaperclaim = False usepaperattribution = False viewclaimlink = False if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role( user_info, acc_get_role_id("paperclaimviewers"))): usepaperclaim = True if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role( user_info, acc_get_role_id("paperattributionviewers"))): usepaperattribution = True viewlink = False try: viewlink = session['personinfo']['claim_in_process'] except (KeyError, TypeError): pass if (current_app.config.get('CFG_BIBAUTHORID_ENABLED') and usepaperattribution and viewlink): viewclaimlink = True # if (CFG_BIBAUTHORID_ENABLED # and ((usepaperclaim or usepaperattribution) # and acc_is_user_in_role( # data, acc_get_role_id("paperattributionlinkviewers")))): # viewclaimlink = True data['precached_viewclaimlink'] = viewclaimlink data['precached_usepaperclaim'] = usepaperclaim data['precached_usepaperattribution'] = usepaperattribution timeout = current_app.config.get( 'CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT', 0)*3600 cache.set(acc_key, data, timeout=timeout) return data
def save_in_storage(self, sid, session_object, timeout, uid): # pylint: disable=W0613 return cache.set(self.generate_key(sid), session_object, timeout=timeout)
def _update_lastrun(obj, eng): cache.set(key, datetime.now().strftime('%Y-%m-%d'), timeout=999999999999)
def store_weights_cache(weights): """Store into key/value store""" cache.set('citations_weights', serialize_via_marshal(weights))
def _set(self, data): self._keys = self._keys | set([data['_id']]) cache.set(self._prefix + data['_id'], data, timeout=99999)
def store_weights_cache(weights): """Store into key/value store""" cache.set('selfcites_weights', serialize_via_marshal(weights))
def _keys(self, value): cache.set(self._prefix + '::keys', value)
def _precache(self, info, force=False): """Calculate permissions for user actions. FIXME: compatibility layer only !!! """ CFG_BIBAUTHORID_ENABLED = current_app.config.get( 'CFG_BIBAUTHORID_ENABLED', False) # get authorization key acc_key = self.get_acc_key() acc = cache.get(acc_key) if not force and acc_key is not None and acc is not None: return acc # FIXME: acc_authorize_action should use flask request directly user_info = info user_info.update(self.req) from invenio.legacy.webuser import isUserSubmitter, isUserReferee, \ isUserAdmin, isUserSuperAdmin from invenio.modules.access.engine import acc_authorize_action from invenio.modules.access.control import acc_get_role_id, \ acc_is_user_in_role from invenio_search.utils import \ get_permitted_restricted_collections from invenio_deposit.cache import \ get_authorized_deposition_types data = {} data['precached_permitted_restricted_collections'] = \ get_permitted_restricted_collections(user_info) data['precached_allowed_deposition_types'] = \ get_authorized_deposition_types(user_info) data['precached_useloans'] = acc_authorize_action( user_info, 'useloans')[0] == 0 data['precached_usegroups'] = acc_authorize_action( user_info, 'usegroups')[0] == 0 data['precached_usemessages'] = acc_authorize_action( user_info, 'usemessages')[0] == 0 try: data['precached_viewsubmissions'] = isUserSubmitter(user_info) except Exception: data['precached_viewsubmissions'] = None data['precached_useapprove'] = isUserReferee(user_info) data['precached_useadmin'] = isUserAdmin(user_info) data['precached_usesuperadmin'] = isUserSuperAdmin(user_info) data['precached_canseehiddenmarctags'] = acc_authorize_action( user_info, 'runbibedit')[0] == 0 usepaperclaim = False usepaperattribution = False viewclaimlink = False if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role( user_info, acc_get_role_id("paperclaimviewers"))): usepaperclaim = True if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role( user_info, acc_get_role_id("paperattributionviewers"))): usepaperattribution = True viewlink = False try: viewlink = session['personinfo']['claim_in_process'] except (KeyError, TypeError): pass if (current_app.config.get('CFG_BIBAUTHORID_ENABLED') and usepaperattribution and viewlink): viewclaimlink = True # if (CFG_BIBAUTHORID_ENABLED # and ((usepaperclaim or usepaperattribution) # and acc_is_user_in_role( # data, acc_get_role_id("paperattributionlinkviewers")))): # viewclaimlink = True data['precached_viewclaimlink'] = viewclaimlink data['precached_usepaperclaim'] = usepaperclaim data['precached_usepaperattribution'] = usepaperattribution timeout = current_app.config.get( 'CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT', 0)*3600 cache.set(acc_key, data, timeout=timeout) return data
def _set(self, data): cache.set(self._prefix + data['_id'], data, timeout=99999)