Example #1
0
        def fill():
            alldicts = {}
            from invenio.legacy.bibrank.tag_based_indexer import fromDB
            serialized_weights = cache.get('citations_weights')
            if serialized_weights:
                weights = deserialize_via_marshal(serialized_weights)
            else:
                weights = fromDB('citation')

            alldicts['citations_weights'] = weights
            # for cited:M->N queries, it is interesting to cache also
            # some preprocessed citationdict:
            alldicts['citations_keys'] = intbitset(weights.keys())

            # Citation counts
            alldicts['citations_counts'] = [t for t in iteritems(weights)]
            alldicts['citations_counts'].sort(key=itemgetter(1), reverse=True)

            # Self-cites
            serialized_weights = cache.get('selfcites_weights')
            if serialized_weights:
                selfcites = deserialize_via_marshal(serialized_weights)
            else:
                selfcites = fromDB('selfcites')
            selfcites_weights = {}
            for recid, counts in alldicts['citations_counts']:
                selfcites_weights[recid] = counts - selfcites.get(recid, 0)
            alldicts['selfcites_weights'] = selfcites_weights
            alldicts['selfcites_counts'] = [(recid, selfcites_weights.get(recid, cites)) for recid, cites in alldicts['citations_counts']]
            alldicts['selfcites_counts'].sort(key=itemgetter(1), reverse=True)

            return alldicts
        def fill():
            alldicts = {}
            from invenio.legacy.bibrank.tag_based_indexer import fromDB
            serialized_weights = cache.get('citations_weights')
            if serialized_weights:
                weights = deserialize_via_marshal(serialized_weights)
            else:
                weights = fromDB('citation')

            alldicts['citations_weights'] = weights
            # for cited:M->N queries, it is interesting to cache also
            # some preprocessed citationdict:
            alldicts['citations_keys'] = intbitset(weights.keys())

            # Citation counts
            alldicts['citations_counts'] = [t for t in iteritems(weights)]
            alldicts['citations_counts'].sort(key=itemgetter(1), reverse=True)

            # Self-cites
            serialized_weights = cache.get('selfcites_weights')
            if serialized_weights:
                selfcites = deserialize_via_marshal(serialized_weights)
            else:
                selfcites = fromDB('selfcites')
            selfcites_weights = {}
            for recid, counts in alldicts['citations_counts']:
                selfcites_weights[recid] = counts - selfcites.get(recid, 0)
            alldicts['selfcites_weights'] = selfcites_weights
            alldicts['selfcites_counts'] = [(recid, selfcites_weights.get(recid, cites)) for recid, cites in alldicts['citations_counts']]
            alldicts['selfcites_counts'].sort(key=itemgetter(1), reverse=True)

            return alldicts
Example #3
0
    def get_publishers(
        self,
        attribute=None,
        journal=None,
    ):
        """ Returns a list of the publishers if a publisher search was made or
        an empty list.

        If a journal is defined, it returns the associated publisher
        for this Journal or None. A journal definition makes the functions
        to query again if the publisher isn't found in the xml or cache.
        Note: If you define a journal, you must have searched for it first.

        If an attribute is defined, returns only this attribute from
        every publisher
        """

        if self.xml['header']['outcome'] == 'notFound' \
           or self.xml['header']['outcome'] == 'failed':
            return None

        if self.xml['header']['outcome'] == 'singleJournal':
            return self.xml['publishers']['publisher']

        if self.xml['header']['outcome'] == 'uniqueZetoc':
            # the Publisher has not yet been indexed by RoMEO
            return None

        if journal is not None:
            #  search the cache for matches
            publisher_key = cache.get("journal-publisher:" + journal.lower())
            if publisher_key is not None:
                return cache.get(publisher_key)

            # Query again sherpa romeo db to get the publisher
            s = SherpaRomeoSearch()
            issn = self.get_journals(attribute='issn')[0]
            if issn is not None:
                s.search_issn(issn)
                return s.parser.get_publishers()
            else:
                return None

        publishers = list()
        if self.xml['header']['outcome'] == 'publisherFound':
            if self.xml['header']['numhits'] == '1':
                p = self.xml['publishers']['publisher']
                if attribute is None:
                    publishers.append(p)
                else:
                    if p[attribute] is None:
                        return []
                    publishers.append(p[attribute])
            else:
                for p in self.xml['publishers']['publisher']:
                    if attribute is None:
                        publishers.append(p)
                    else:
                        publishers.append(p[attribute])
        return publishers
Example #4
0
    def get_publishers(self, attribute=None, journal=None,):
        """ Returns a list of the publishers if a publisher search was made or
        an empty list.

        If a journal is defined, it returns the associated publisher
        for this Journal or None. A journal definition makes the functions
        to query again if the publisher isn't found in the xml or cache.
        Note: If you define a journal, you must have searched for it first.

        If an attribute is defined, returns only this attribute from
        every publisher
        """

        if self.xml['header']['outcome'] == 'notFound' \
           or self.xml['header']['outcome'] == 'failed':
            return None

        if self.xml['header']['outcome'] == 'singleJournal':
            return self.xml['publishers']['publisher']

        if self.xml['header']['outcome'] == 'uniqueZetoc':
            # the Publisher has not yet been indexed by RoMEO
            return None

        if journal is not None:
            #  search the cache for matches
            publisher_key = cache.get("journal-publisher:" + journal.lower())
            if publisher_key is not None:
                return cache.get(publisher_key)

            # Query again sherpa romeo db to get the publisher
            s = SherpaRomeoSearch()
            issn = self.get_journals(attribute='issn')[0]
            if issn is not None:
                s.search_issn(issn)
                return s.parser.get_publishers()
            else:
                return None

        publishers = list()
        if self.xml['header']['outcome'] == 'publisherFound':
            if self.xml['header']['numhits'] == '1':
                p = self.xml['publishers']['publisher']
                if attribute is None:
                    publishers.append(p)
                else:
                    if p[attribute] is None:
                        return []
                    publishers.append(p[attribute])
            else:
                for p in self.xml['publishers']['publisher']:
                    if attribute is None:
                        publishers.append(p)
                    else:
                        publishers.append(p[attribute])
        return publishers
Example #5
0
 def _sorter(item):
     try:
         results = cache.get("workflows_holdingpen_{0}".format(item.id))
         if results:
             return msgpack.loads(results)[name]
     except Exception:
         current_app.logger.exception(
             "Invalid format for object {0}: {1}".format(
                 item.id,
                 cache.get("workflows_holdingpen_{0}".format(item.id))))
Example #6
0
def get_formatted_holdingpen_object(bwo, date_format='%Y-%m-%d %H:%M:%S.%f'):
    """Return the formatted output, from cache if available."""
    results = cache.get("workflows_holdingpen_{0}".format(bwo.id))
    if results:
        results = msgpack.loads(cache.get("workflows_holdingpen_{0}".format(bwo.id)))
        if results["date"] == bwo.modified.strftime(date_format):
            return results
    results = generate_formatted_holdingpen_object(bwo)
    if results:
        cache.set("workflows_holdingpen_{0}".format(bwo.id), msgpack.dumps(results))
    return results
Example #7
0
def get_formatted_holdingpen_object(bwo, date_format='%Y-%m-%d %H:%M:%S.%f'):
    """Return the formatted output, from cache if available."""
    results = cache.get("workflows_holdingpen_{0}".format(bwo.id))
    if results:
        results = msgpack.loads(cache.get("workflows_holdingpen_{0}".format(bwo.id)))
        if results["date"] == bwo.modified.strftime(date_format):
            return results
    results = generate_formatted_holdingpen_object(bwo)
    if results:
        cache.set("workflows_holdingpen_{0}".format(bwo.id), msgpack.dumps(results))
    return results
Example #8
0
 def _sorter(item):
     try:
         results = cache.get("workflows_holdingpen_{0}".format(item.id))
         if results:
             return msgpack.loads(results)[name]
     except Exception:
         current_app.logger.exception(
             "Invalid format for object {0}: {1}".format(
                 item.id,
                 cache.get("workflows_holdingpen_{0}".format(item.id))
             )
         )
Example #9
0
def get_formatted_holdingpen_object(bwo, date_format='%Y-%m-%d %H:%M:%S.%f'):
    """Return the formatted output, from cache if available."""
    results = cache.get("workflows_holdingpen_{0}".format(bwo.id))
    if results:
        results = msgpack.loads(
            cache.get("workflows_holdingpen_{0}".format(bwo.id)))
        if results["date"] == bwo.modified.strftime(date_format):
            return results
    results = generate_formatted_holdingpen_object(bwo)
    if results:
        cache.set("workflows_holdingpen_{0}".format(bwo.id),
                  msgpack.dumps(results),
                  timeout=current_app.config.get(
                      "WORKFLOWS_HOLDING_PEN_CACHE_TIMEOUT"))
    return results
Example #10
0
    def _login(self, uid, force=False):
        """Get account information about currently logged user from database.

        Should raise an exception when session.uid is not valid User.id.
        """
        data = cache.get(self.get_key())
        if not force and data is not None:
            return data

        from invenio_accounts.models import User
        data = {}

        try:
            user = User.query.get(uid)
            data['id'] = data['uid'] = user.id or -1
            data['nickname'] = user.nickname or ''
            data['given_names'] = user.given_names or ''
            data['family_name'] = user.family_name or ''
            data['email'] = user.email or ''
            data['note'] = user.note or ''
            data['group'] = map(lambda x: x.group.name,
                                user.groups or [])
            data.update(user.settings or {})
            data['settings'] = user.settings or {}
            data['guest'] = str(int(user.guest))  # '1' or '0'
            self.modified = True
        except Exception:
            data = self._create_guest()

        return data
Example #11
0
    def _login(self, uid, force=False):
        """Get account information about currently logged user from database.

        Should raise an exception when session.uid is not valid User.id.
        """
        data = cache.get(self.get_key())
        if not force and data is not None:
            return data

        from invenio.modules.accounts.models import User
        data = {}

        try:
            user = User.query.get(uid)
            data['id'] = data['uid'] = user.id or -1
            data['nickname'] = user.nickname or ''
            data['given_names'] = user.given_names or ''
            data['family_name'] = user.family_name or ''
            data['email'] = user.email or ''
            data['note'] = user.note or ''
            data['group'] = map(lambda x: x.usergroup.name,
                                user.usergroups or [])
            data.update(user.settings or {})
            data['settings'] = user.settings or {}
            data['guest'] = str(int(user.guest))  # '1' or '0'
            self.modified = True
        except Exception:
            data = self._create_guest()

        return data
Example #12
0
def record_actions(recid=None, project_id=None, action_name='',
                   action=None, msg='', redirect_url=None):
    uid = current_user.get_id()
    record = get_record(recid)
    if not record:
        abort(404)

    # either the use is allowed in the project
    # or is the owner
    if project_id:
        project = Project.query.get_or_404(project_id)
        if not project.is_user_allowed():
            abort(401)
    else:
        if uid != int(record.get('owner', {}).get('id', -1)):
            abort(401)

    # crazy invenio stuff, cache actions so they dont get duplicated
    key = action_key(recid, action_name)
    cache_action = cache.get(key)
    if cache_action == action_name:
        return json_error(400, ' '.join([msg, 'Please wait some minutes.']))
    # Set 5 min cache to allow bibupload/bibreformat to finish
    cache.set(key, action_name, timeout=5 * 60)

    r = action(record)
    if r is not None:
        return r

    if redirect_url is None:
        redirect_url = url_for('record.metadata', recid=recid)
    return jsonify({'status': 'ok', 'redirect': redirect_url})
Example #13
0
def robotupload_callback():
    """Handle callback from robotupload.

    If robotupload was successful caches the workflow
    object id that corresponds to the uploaded record,
    so the workflow can be resumed when webcoll finish
    processing that record.
    If robotupload encountered an error sends an email
    to site administrator informing him about the error."""
    request_data = request.get_json()
    id_object = request_data.get("nonce", "")
    results = request_data.get("results", [])
    status = False
    for result in results:
        status = result.get('success', False)
        if status:
            recid = result.get('recid')
            pending_records = cache.get("pending_records") or dict()
            pending_records[str(recid)] = str(id_object)
            cache.set("pending_records", pending_records,
                      timeout=cfg["PENDING_RECORDS_CACHE_TIMEOUT"])
        else:
            from invenio.ext.email import send_email

            body = ("There was an error when uploading the "
                    "submission with id: %s.\n" % id_object)
            body += "Error message:\n"
            body += result.get('error_message', '')
            send_email(
                cfg["CFG_SITE_SUPPORT_EMAIL"],
                cfg["CFG_SITE_ADMIN_EMAIL"],
                'BATCHUPLOAD ERROR',
                body
            )
    return jsonify({"result": status})
Example #14
0
def curate():
    """Index page with uploader and list of existing depositions."""
    from invenio.legacy.search_engine import get_fieldvalues
    action = request.values.get('action')
    community_id = request.values.get('collection')
    recid = request.values.get('recid', 0, type=int)
    # Allowed actions
    if action not in ['accept', 'reject', 'remove']:
        abort(400)

    # Check recid
    if not recid:
        abort(400)
    recid = int(recid)

    # Does community exists
    u = Community.query.filter_by(id=community_id).first()
    if not u:
        abort(400)
    # Check permission to perform action on this record
    # - Accept and reject is done by community owner
    # - Remove  is done by record owner
    if action in [
            'accept',
            'reject',
    ]:
        if u.id_user != current_user.get_id():
            abort(403)
    elif action == 'remove':
        try:
            email = get_fieldvalues(recid, '8560_f')[0]
            if email != current_user['email']:
                abort(403)
            # inform interested parties of removing collection/community
            curate_record.send(u,
                               action=action,
                               recid=recid,
                               user=current_user)
        except (IndexError, KeyError):
            abort(403)

    # Prevent double requests (i.e. give bibupload a chance to make the change)
    key = "community_curate:%s_%s" % (community_id, recid)
    cache_action = cache.get(key)
    if cache_action == action or cache_action in ['reject', 'remove']:
        return jsonify({'status': 'success', 'cache': 1})
    elif cache_action:
        # Operation under way, but the same action
        return jsonify({'status': 'failure', 'cache': 1})

    if action == "accept":
        res = u.accept_record(recid)
    elif action == "reject" or action == "remove":
        res = u.reject_record(recid)
    if res:
        # Set 5 min cache to allow bibupload/webcoll to finish
        cache.set(key, action, timeout=5 * 60)
        return jsonify({'status': 'success', 'cache': 0})
    else:
        return jsonify({'status': 'failure', 'cache': 0})
Example #15
0
def robotupload_callback():
    """Handle callback from robotupload.

    If robotupload was successful caches the workflow
    object id that corresponds to the uploaded record,
    so the workflow can be resumed when webcoll finish
    processing that record.
    If robotupload encountered an error sends an email
    to site administrator informing him about the error."""
    request_data = request.get_json()
    id_object = request_data.get("nonce", "")
    results = request_data.get("results", [])
    for result in results:
        status = result.get('success', False)
        if status:
            recid = result.get('recid')
            pending_records = cache.get("pending_records") or dict()
            pending_records[str(recid)] = str(id_object)
            cache.set("pending_records", pending_records)
        else:
            from invenio.config import CFG_SITE_ADMIN_EMAIL
            from invenio.ext.email import send_email
            from invenio.config import CFG_SITE_SUPPORT_EMAIL

            body = ("There was an error when uploading the "
                    "submission with id: %s.\n" % id_object)
            body += "Error message:\n"
            body += result.get('error_message', '')
            send_email(
                CFG_SITE_SUPPORT_EMAIL,
                CFG_SITE_ADMIN_EMAIL,
                'BATCHUPLOAD ERROR',
                body
            )
    return jsonify({"result": status})
Example #16
0
def get_formatted_holdingpen_object(bwo, date_format='%Y-%m-%d %H:%M:%S.%f'):
    """Return the formatted output, from cache if available."""
    results = cache.get("workflows_holdingpen_{0}".format(bwo.id))
    if results:
        results = msgpack.loads(
            cache.get(
                "workflows_holdingpen_{0}".format(
                    bwo.id)))
        if results["date"] == bwo.modified.strftime(date_format):
            return results
    results = generate_formatted_holdingpen_object(bwo)
    if results:
        cache.set("workflows_holdingpen_{0}".format(bwo.id),
                  msgpack.dumps(results),
                  timeout=current_app.config.get(
                      "WORKFLOWS_HOLDING_PEN_CACHE_TIMEOUT"
                  ))
    return results
Example #17
0
def curate():
    """
    Index page with uploader and list of existing depositions
    """
    from invenio.legacy.search_engine import get_fieldvalues
    action = request.values.get('action')
    community_id = request.values.get('collection')
    recid = request.values.get('recid', 0, type=int)
    # Allowed actions
    if action not in ['accept', 'reject', 'remove']:
        abort(400)

    # Check recid
    if not recid:
        abort(400)
    recid = int(recid)

    # Does community exists
    u = Community.query.filter_by(id=community_id).first()
    if not u:
        abort(400)
    # Check permission to perform action on this record
    # - Accept and reject is done by community owner
    # - Remove  is done by record owner
    if action in ['accept', 'reject', ]:
        if u.id_user != current_user.get_id():
            abort(403)
    elif action == 'remove':
        try:
            email = get_fieldvalues(recid, '8560_f')[0]
            if email != current_user['email']:
                abort(403)
            # inform interested parties of removing collection/community
            curate_record.send(u, action=action, recid=recid, user=current_user)
        except (IndexError, KeyError):
            abort(403)

    # Prevent double requests (i.e. give bibupload a chance to make the change)
    key = "community_curate:%s_%s" % (community_id, recid)
    cache_action = cache.get(key)
    if cache_action == action or cache_action in ['reject', 'remove']:
        return jsonify({'status': 'success', 'cache': 1})
    elif cache_action:
        # Operation under way, but the same action
        return jsonify({'status': 'failure', 'cache': 1})

    if action == "accept":
        res = u.accept_record(recid)
    elif action == "reject" or action == "remove":
        res = u.reject_record(recid)
    if res:
        # Set 5 min cache to allow bibupload/webcoll to finish
        cache.set(key, action, timeout=5*60)
        return jsonify({'status': 'success', 'cache': 0})
    else:
        return jsonify({'status': 'failure', 'cache': 0})
Example #18
0
def cached_record_action(record, action_name):
    """Determine if a given action is underway."""
    cache_action = cache.get(action_key(record['recid'], action_name))
    if cache_action == action_name:
        return True
    attrs_for_actions = {
        'curate': 'record_curated_in_project',
        'publish': 'record_publish_from_project',
        'archive': 'record_selected_for_archive',
        'doi': 'doi',
    }
    return record.get(attrs_for_actions[action_name], False)
Example #19
0
 def _sorter(item):
     try:
         cached_results = get_formatted_holdingpen_object(item)
         if from_data:
             # Get value from sort_data
             return cached_results.get("sort_data", {}).get(name)
         else:
             return cached_results.get(name)
     except Exception:
         current_app.logger.exception(
             "Invalid format for object {0}: {1}".format(
                 item.id,
                 cache.get("workflows_holdingpen_{0}".format(item.id))))
Example #20
0
    def parse_url(self, url):
        self.url = url
        # example
        #url = 'http://www.sherpa.ac.uk/romeo/api29.php?jtitle=Annals%20of%20Physics'

        found_journal = url.find("jtitle=")
        found_publisher = url.find("pub=")
        if found_journal != -1:
            self.search_type = "journal_search"
            self.query = url[found_journal + 7:(len(url) - 15)]
        elif found_publisher != -1:
            self.search_type = "publisher_search"
            found_publisher = url.find("pub=")
            self.query = url[found_publisher + 4:len(url)]
        else:
            self.search_type = "issn_search"
            found_publisher = url.find("issn=")
            self.query = url[found_publisher + 4:len(url)]

        cached_xml = cache.get(self.search_type + ":" + self.query.lower())
        if cached_xml is None:
            try:
                self.data = urllib2.urlopen(url).read()
            except urllib2.HTTPError:
                self.error = True
                return
            try:
                root = ElementTree.XML(self.data)
            except SyntaxError:
                self.error = True
                return
            self.xml = XmlDictConfig(root)
            outcome = self.xml['header']['outcome']
            if outcome != 'failed' and outcome != 'notFound':
                cache.set(
                    self.search_type +
                    ":" +
                    self.query.lower(),
                    self.xml,
                    999999999999)
        else:
            self.xml = cached_xml
            #self.data = cached_xml
            #root = ElementTree.XML(self.data)
            #self.xml = XmlDictConfig(root)

        if self.xml['header']['outcome'] == 'failed':
            self.error = True
            self.error_message = self.xml['header']['message']
        self.parsed = True
        self._cache_parsed_xml()
Example #21
0
def remove_session(user_id):
    """Remove session for a user."""
    prefix = cache.cache.key_prefix + "session::"

    for k in cache.cache._client.keys():
        if k.startswith(prefix):
            k = k[len(cache.cache.key_prefix):]
            try:
                data = Serializer.loads(cache.get(k))
                if data['uid'] == user_id:
                    print k
                    cache.delete(k)
            except TypeError:
                pass
Example #22
0
    def _convert_files(obj, eng):
        from invenio_knowledge.api import get_kb_mappings
        mappings = dict(
            map(
                lambda item: (item['key'], item['value']),
                get_kb_mappings('JOURNALS')
            )
        )
        ws = WorldScientific(mappings)

        target_folder_full = get_storage_path(suffix=target_folder)

        args = obj.extra_data['args']
        # By default, we set the from date as today
        to_date = args.get("to_date") or datetime.now().strftime('%Y-%m-%d')

        # By last resort, we set the from date a week before
        from_date = args.get("from_date") or cache.get(date_key) \
            or (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')

        obj.extra_data['args']["to_date"] = to_date
        obj.extra_data['args']["from_date"] = from_date

        insert_files = []
        filenames = obj.data['extracted_files']
        for filename in filenames:
            date = ws.get_date(filename)
            if from_date <= date <= to_date:
                marc = ws.get_record(filename)
                if marc:
                    filename = basename(filename)
                    filename = join(target_folder_full, filename)
                    insert_files.append(filename)
                    with open(filename, 'w') as outfile:
                        outfile.write(marc)

        obj.log.info("Converted {0} articles between {1} to {2}".format(
            len(insert_files),
            from_date,
            to_date
        ))

        obj.data['insert'] = insert_files
        obj.data["result_path"] = target_folder_full

        obj.log.debug("Saved converted files to {0}".format(target_folder_full))
        obj.log.debug("{0} files to add".format(
            len(obj.data["insert"]),
        ))
Example #23
0
 def _sorter(item):
     try:
         cached_results = get_formatted_holdingpen_object(item)
         if from_data:
             # Get value from sort_data
             return cached_results.get("sort_data", {}).get(name)
         else:
             return cached_results.get(name)
     except Exception:
         current_app.logger.exception(
             "Invalid format for object {0}: {1}".format(
                 item.id,
                 cache.get("workflows_holdingpen_{0}".format(item.id))
             )
         )
Example #24
0
    def parse_url(self, url):
        self.url = url
        #example
        #url = 'http://www.sherpa.ac.uk/romeo/api29.php?jtitle=Annals%20of%20Physics'

        found_journal = url.find("jtitle=")
        found_publisher = url.find("pub=")
        if found_journal != -1:
            self.search_type = "journal_search"
            self.query = url[found_journal + 7:(len(url) - 15)]
        elif found_publisher != -1:
            self.search_type = "publisher_search"
            found_publisher = url.find("pub=")
            self.query = url[found_publisher + 4:len(url)]
        else:
            self.search_type = "issn_search"
            found_publisher = url.find("issn=")
            self.query = url[found_publisher + 4:len(url)]

        cached_xml = cache.get(self.search_type + ":" + self.query.lower())
        if cached_xml is None:
            try:
                self.data = urllib2.urlopen(url).read()
            except urllib2.HTTPError:
                self.error = True
                return
            try:
                root = ElementTree.XML(self.data)
            except SyntaxError:
                self.error = True
                return
            self.xml = XmlDictConfig(root)
            outcome = self.xml['header']['outcome']
            if outcome != 'failed' and outcome != 'notFound':
                cache.set(self.search_type + ":" + self.query.lower(),
                          self.xml, 999999999999)
        else:
            self.xml = cached_xml
            #self.data = cached_xml
            #root = ElementTree.XML(self.data)
            #self.xml = XmlDictConfig(root)

        if self.xml['header']['outcome'] == 'failed':
            self.error = True
            self.error_message = self.xml['header']['message']
        self.parsed = True
        self._cache_parsed_xml()
Example #25
0
def webcoll_callback():
    """Handle a callback from webcoll with the record ids processed.

    Expects the request data to contain a list of record ids in the
    recids field.
    """
    recids = dict(request.form).get('recids', [])
    pending_records = cache.get("pending_records") or dict()
    for rid in recids:
        if rid in pending_records:
            objectid = pending_records[rid]
            workflow_object = BibWorkflowObject.query.get(objectid)
            extra_data = workflow_object.get_extra_data()
            extra_data['url'] = join(CFG_ROBOTUPLOAD_SUBMISSION_BASEURL, 'record', str(rid))
            workflow_object.set_extra_data(extra_data)
            workflow_object.continue_workflow(delayed=True)
            del pending_records[rid]
            cache.set("pending_records", pending_records)
    return jsonify({"result": "success"})
Example #26
0
def robotupload_callback():
    """Handle callback from robotupload.

    If robotupload was successful caches the workflow
    object id that corresponds to the uploaded record,
    so the workflow can be resumed when webcoll finish
    processing that record.
    If robotupload encountered an error sends an email
    to site administrator informing him about the error."""
    request_data = request.get_json()
    id_object = request_data.get("nonce", "")
    results = request_data.get("results", [])
    for result in results:
        status = result.get('success', False)
        if status:
            recid = result.get('recid')
            pending_records = cache.get("pending_records")
            if pending_records:
                pending_records = msgpack.loads(pending_records)
                pending_records[str(recid)] = id_object
                cache.set("pending_records", msgpack.dumps(pending_records))
            else:
                cache.set("pending_records", msgpack.dumps({
                    str(recid): id_object
                }))
        else:
            from invenio.config import CFG_SITE_ADMIN_EMAIL
            from invenio.ext.email import send_email
            from invenio.config import CFG_SITE_SUPPORT_EMAIL

            body = ("There was an error when uploading the "
                    "submission with id: %s.\n" % id_object)
            body += "Error message:\n"
            body += result.get('error_message', '')
            send_email(
                CFG_SITE_SUPPORT_EMAIL,
                CFG_SITE_ADMIN_EMAIL,
                'BATCHUPLOAD ERROR',
                body
            )
    return jsonify({"result": status})
Example #27
0
    def search_publisher(self, query):
        """
        Search for Publishers
        query: the query to be made

        returns a list with publisher names
        """

        # Search first for exact matches in cache
        cached_publisher = cache.get("publisher:" + query.lower())
        if cached_publisher is not None:
            self.parser.set_single_item(publisher=cached_publisher,)
            return cached_publisher['name']

        cleanquery = query.replace(" ", "+")
        url = "http://www.sherpa.ac.uk/romeo/api29.php?pub=" + cleanquery
        self.parser.parse_url(url)
        self.error = self.parser.error
        self.error_message = self.parser.error_message
        if not self.error:
            return self.parser.get_publishers(attribute='name')
Example #28
0
    def search_publisher(self, query):
        """
        Search for Publishers
        query: the query to be made

        returns a list with publisher names
        """

        # Search first for exact matches in cache
        cached_publisher = cache.get("publisher:" + query.lower())
        if cached_publisher is not None:
            self.parser.set_single_item(publisher=cached_publisher, )
            return cached_publisher['name']

        cleanquery = query.replace(" ", "+")
        url = "http://www.sherpa.ac.uk/romeo/api29.php?pub=" + cleanquery
        self.parser.parse_url(url)
        self.error = self.parser.error
        self.error_message = self.parser.error_message
        if not self.error:
            return self.parser.get_publishers(attribute='name')
Example #29
0
    def search_journal(self, query, query_type='contains'):
        """
        Search for Journals
        query: the query to be made
        query_type: it must be 'contains'(default), 'exact' or 'start'

        returns a list with the specific journal titles or empty list
        """

        if query_type is 'exact':
            # Search first for exact matches in cache
            cached_journal = cache.get("journal:" + query.lower())
            if cached_journal is not None:
                self.parser.set_single_item(journal=cached_journal)
                return cached_journal['jtitle']

        cleanquery = query.replace(" ", "+")
        url = "http://www.sherpa.ac.uk/romeo/api29.php?jtitle=" + cleanquery + "&qtype=" + query_type
        self.parser.parse_url(url)
        self.error = self.parser.error
        self.error_message = self.parser.error_message
        if not self.error:
            return self.parser.get_journals(attribute='jtitle')
Example #30
0
    def search_journal(self, query, query_type='contains'):
        """
        Search for Journals
        query: the query to be made
        query_type: it must be 'contains'(default), 'exact' or 'start'

        returns a list with the specific journal titles or empty list
        """

        if query_type is 'exact':
            # Search first for exact matches in cache
            cached_journal = cache.get("journal:" + query.lower())
            if cached_journal is not None:
                self.parser.set_single_item(journal=cached_journal)
                return cached_journal['jtitle']

        cleanquery = query.replace(" ", "+")
        url = "http://www.sherpa.ac.uk/romeo/api29.php?jtitle=" + cleanquery + "&qtype=" + query_type
        self.parser.parse_url(url)
        self.error = self.parser.error
        self.error_message = self.parser.error_message
        if not self.error:
            return self.parser.get_journals(attribute='jtitle')
Example #31
0
def webcoll_callback():
    """Handle a callback from webcoll with the record ids processed.

    Expects the request data to contain a list of record ids in the
    recids field.
    """
    recids = dict(request.form).get('recids', [])
    try:
        pending_records = msgpack.loads(cache.get("pending_records"))
    except TypeError:
        pending_records = {}
    if pending_records:
        pending_records = msgpack.loads(pending_records)
        for rid in recids:
            if rid in pending_records:
                objectid = pending_records[rid]
                workflow_object = BibWorkflowObject.query.get(objectid)
                extra_data = workflow_object.get_extra_data()
                extra_data['url'] = join(CFG_SITE_URL, 'record', str(rid))
                workflow_object.set_extra_data(extra_data)
                workflow_object.continue_workflow(delayed=True)
                del pending_records[rid]
                cache.set("pending_records", msgpack.dumps(pending_records))
    return jsonify({"result": "success"})
def bst_webcoll_postprocess(recids=[]):
    """Parse recids to POST to remote server to alert that records are visible."""
    if isinstance(recids, str):
        recids = recids.split(",")
    cached_ids = cache.get("webcoll_pending_recids") or []
    recids += cached_ids

    if not cfg.get("CFG_WEBCOLL_POST_REQUEST_URL"):
        write_message("CFG_WEBCOLL_POST_REQUEST_URL is not set.")
        task_update_status('ERROR')
        return 1

    if recids:
        write_message("Going to POST callback to {0}: {1} (total: {2})".format(
            cfg["CFG_WEBCOLL_POST_REQUEST_URL"],
            recids[:10],
            len(recids))
        )
        session = requests.Session()
        addapter = requests.adapters.HTTPAdapter(max_retries=3)
        session.mount(cfg["CFG_WEBCOLL_POST_REQUEST_URL"], addapter)
        response = session.post(cfg["CFG_WEBCOLL_POST_REQUEST_URL"],
                                data={'recids': recids})
        if response.ok:
            write_message("Post request sent successfully")
            cache.set("webcoll_pending_recids", [])
        else:
            write_message("Post request failed!")
            write_message(response.text)
            task_update_status('ERROR')
            cache.set("webcoll_pending_recids", recids)
        session.close()
    else:
        write_message("No recids to POST callback for to {0}.".format(
            cfg["CFG_WEBCOLL_POST_REQUEST_URL"],
        ))
Example #33
0
    def _precache(self, info, force=False):
        """Calculate prermitions for user actions.

        FIXME: compatibility layer only !!!
        """
        CFG_BIBAUTHORID_ENABLED = current_app.config.get(
            'CFG_BIBAUTHORID_ENABLED', False)
        # get autorization key
        acc_key = self.get_acc_key()
        acc = cache.get(acc_key)
        if not force and acc_key is not None and acc is not None:
            return acc

        # FIXME: acc_authorize_action should use flask request directly
        user_info = info
        user_info.update(self.req)

        from invenio.legacy.webuser import isUserSubmitter, isUserReferee, \
            isUserAdmin, isUserSuperAdmin
        from invenio.modules.access.engine import acc_authorize_action
        from invenio.modules.access.control import acc_get_role_id, \
            acc_is_user_in_role
        from invenio.modules.search.utils import \
            get_permitted_restricted_collections

        data = {}
        data['precached_permitted_restricted_collections'] = \
            get_permitted_restricted_collections(user_info)
        data['precached_usebaskets'] = acc_authorize_action(
            user_info, 'usebaskets')[0] == 0
        data['precached_useloans'] = acc_authorize_action(
            user_info, 'useloans')[0] == 0
        data['precached_usegroups'] = acc_authorize_action(
            user_info, 'usegroups')[0] == 0
        data['precached_usealerts'] = acc_authorize_action(
            user_info, 'usealerts')[0] == 0
        data['precached_usemessages'] = acc_authorize_action(
            user_info, 'usemessages')[0] == 0
        data['precached_usestats'] = acc_authorize_action(
            user_info, 'runwebstatadmin')[0] == 0
        try:
            data['precached_viewsubmissions'] = isUserSubmitter(user_info)
        except Exception:
            data['precached_viewsubmissions'] = None
        data['precached_useapprove'] = isUserReferee(user_info)
        data['precached_useadmin'] = isUserAdmin(user_info)
        data['precached_usesuperadmin'] = isUserSuperAdmin(user_info)
        data['precached_canseehiddenmarctags'] = acc_authorize_action(
            user_info, 'runbibedit')[0] == 0
        usepaperclaim = False
        usepaperattribution = False
        viewclaimlink = False

        if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role(
                user_info, acc_get_role_id("paperclaimviewers"))):
            usepaperclaim = True

        if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role(
                user_info, acc_get_role_id("paperattributionviewers"))):
            usepaperattribution = True

        viewlink = False
        try:
            viewlink = session['personinfo']['claim_in_process']
        except (KeyError, TypeError):
            pass

        if (current_app.config.get('CFG_BIBAUTHORID_ENABLED') and
           usepaperattribution and viewlink):
            viewclaimlink = True

#       if (CFG_BIBAUTHORID_ENABLED
#               and ((usepaperclaim or usepaperattribution)
#               and acc_is_user_in_role(
#                   data, acc_get_role_id("paperattributionlinkviewers")))):
#           viewclaimlink = True

        data['precached_viewclaimlink'] = viewclaimlink
        data['precached_usepaperclaim'] = usepaperclaim
        data['precached_usepaperattribution'] = usepaperattribution

        timeout = current_app.config.get(
            'CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT', 0)*3600
        cache.set(acc_key, data,
                  timeout=timeout)
        return data
def format_element(bfo, recid=None):
    key = action_key(recid, 'doi')
    cache_action = cache.get(key)
    return cache_action
Example #35
0
 def _get(self, id):
     value = cache.get(self._prefix + id)
     if value is None:
         raise KeyError()
     return value
Example #36
0
 def load_from_storage(self, sid):
     return cache.get(self.generate_key(sid))
Example #37
0
 def _keys(self):
     return cache.get(self._prefix + '::keys') or set()
Example #38
0
 def load_from_storage(self, sid):
     return cache.get(self.generate_key(sid))
Example #39
0
 def _keys(self):
     return cache.get(self._prefix + '::keys') or set()
Example #40
0
def curation_action(recid, ucoll_id=None):
    """Determine if curation action is underway."""
    return cache.get("community_curate:%s_%s" % (ucoll_id, recid))
Example #41
0
 def _get(self, id):
     value = cache.get(self._prefix + id)
     if value is None:
         raise KeyError()
def format_element(bfo, recid=None):
    key = action_key(recid, 'doi')
    cache_action = cache.get(key)
    return cache_action
Example #43
0
def curation_action(recid, ucoll_id=None):
    """Determine if curation action is underway."""
    return cache.get("community_curate:%s_%s" % (ucoll_id, recid))
Example #44
0
    def _precache(self, info, force=False):
        """Calculate permissions for user actions.

        FIXME: compatibility layer only !!!
        """
        CFG_BIBAUTHORID_ENABLED = current_app.config.get(
            'CFG_BIBAUTHORID_ENABLED', False)
        # get authorization key
        acc_key = self.get_acc_key()
        acc = cache.get(acc_key)
        if not force and acc_key is not None and acc is not None:
            return acc

        # FIXME: acc_authorize_action should use flask request directly
        user_info = info
        user_info.update(self.req)

        from invenio.legacy.webuser import isUserSubmitter, isUserReferee, \
            isUserAdmin, isUserSuperAdmin
        from invenio.modules.access.engine import acc_authorize_action
        from invenio.modules.access.control import acc_get_role_id, \
            acc_is_user_in_role
        from invenio_search.utils import \
            get_permitted_restricted_collections
        from invenio_deposit.cache import \
            get_authorized_deposition_types

        data = {}
        data['precached_permitted_restricted_collections'] = \
            get_permitted_restricted_collections(user_info)
        data['precached_allowed_deposition_types'] = \
            get_authorized_deposition_types(user_info)
        data['precached_useloans'] = acc_authorize_action(
            user_info, 'useloans')[0] == 0
        data['precached_usegroups'] = acc_authorize_action(
            user_info, 'usegroups')[0] == 0
        data['precached_usemessages'] = acc_authorize_action(
            user_info, 'usemessages')[0] == 0
        try:
            data['precached_viewsubmissions'] = isUserSubmitter(user_info)
        except Exception:
            data['precached_viewsubmissions'] = None
        data['precached_useapprove'] = isUserReferee(user_info)
        data['precached_useadmin'] = isUserAdmin(user_info)
        data['precached_usesuperadmin'] = isUserSuperAdmin(user_info)
        data['precached_canseehiddenmarctags'] = acc_authorize_action(
            user_info, 'runbibedit')[0] == 0
        usepaperclaim = False
        usepaperattribution = False
        viewclaimlink = False

        if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role(
                user_info, acc_get_role_id("paperclaimviewers"))):
            usepaperclaim = True

        if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role(
                user_info, acc_get_role_id("paperattributionviewers"))):
            usepaperattribution = True

        viewlink = False
        try:
            viewlink = session['personinfo']['claim_in_process']
        except (KeyError, TypeError):
            pass

        if (current_app.config.get('CFG_BIBAUTHORID_ENABLED') and
           usepaperattribution and viewlink):
            viewclaimlink = True

#       if (CFG_BIBAUTHORID_ENABLED
#               and ((usepaperclaim or usepaperattribution)
#               and acc_is_user_in_role(
#                   data, acc_get_role_id("paperattributionlinkviewers")))):
#           viewclaimlink = True

        data['precached_viewclaimlink'] = viewclaimlink
        data['precached_usepaperclaim'] = usepaperclaim
        data['precached_usepaperattribution'] = usepaperattribution

        timeout = current_app.config.get(
            'CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT', 0)*3600
        cache.set(acc_key, data,
                  timeout=timeout)
        return data