def journal(self, journal_id, lock_journal=False, lock_account=None, lock_timeout=None): """ Function to retrieve a journal by its id, and to optionally lock the resource May raise a Locked exception, if a lock is requested but can't be obtained. :param journal_id: the id of the journal :param: lock_journal: should we lock the resource on retrieval :param: lock_account: which account is doing the locking? Must be present if lock_journal=True :param: lock_timeout: how long to lock the resource for. May be none, in which case it will default :return: Tuple of (Journal Object, Lock Object) """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("journal", [ {"arg": journal_id, "allow_none" : False, "arg_name" : "journal_id"}, {"arg": lock_journal, "instance" : bool, "allow_none" : False, "arg_name" : "lock_journal"}, {"arg": lock_account, "instance" : models.Account, "allow_none" : True, "arg_name" : "lock_account"}, {"arg": lock_timeout, "instance" : int, "allow_none" : True, "arg_name" : "lock_timeout"} ], exceptions.ArgumentException) # retrieve the journal journal = models.Journal.pull(journal_id) # if we've retrieved the journal, and a lock is requested, request it the_lock = None if journal is not None and lock_journal: if lock_account is not None: the_lock = lock.lock("journal", journal_id, lock_account.id, lock_timeout) else: raise exceptions.ArgumentException("If you specify lock_journal on journal retrieval, you must also provide lock_account") return journal, the_lock
def get_duplicates(self, article, owner=None, max_results=10): """ Get all known duplicates of an article If the owner id is provided, this will limit the search to duplicates owned by that owner :param article: :param owner: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("get_duplicates", [ {"arg": article, "instance" : models.Article, "allow_none" : False, "arg_name" : "article"}, {"arg" : owner, "instance" : unicode, "allow_none" : True, "arg_name" : "owner"} ], exceptions.ArgumentException) possible_articles_dict = self.discover_duplicates(article, owner, max_results) if not possible_articles_dict: return [] # We don't need the details of duplicate types, so flatten the lists. all_possible_articles = [article for dup_type in possible_articles_dict.values() for article in dup_type] # An article may fulfil more than one duplication criteria, so needs to be de-duplicated ids = [] possible_articles = [] for a in all_possible_articles: if a.id not in ids: ids.append(a.id) possible_articles.append(a) # Sort the articles newest -> oldest by last_updated so we can get the most recent at [0] possible_articles.sort(key=lambda x: datetime.strptime(x.last_updated, "%Y-%m-%dT%H:%M:%SZ"), reverse=True) return possible_articles[:max_results]
def get_duplicate(self, article, owner=None): """ Get at most one one, most recent, duplicate article for the supplied article. If the owner id is provided, this will limit the search to duplicates owned by that owner :param article: :param owner: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("get_duplicate", [{ "arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article" }, { "arg": owner, "instance": unicode, "allow_none": True, "arg_name": "owner" }], exceptions.ArgumentException) dup = self.get_duplicates(article, owner, max_results=2) if len(dup) > 1: raise exceptions.ArticleMergeConflict( Messages.EXCEPTION_ARTICLE_MERGE_CONFLICT) if dup: return dup.pop() else: return None
def application(self, application_id, lock_application=False, lock_account=None, lock_timeout=None): """ Function to retrieve an application by its id :param application_id: the id of the application :param: lock_application: should we lock the resource on retrieval :param: lock_account: which account is doing the locking? Must be present if lock_journal=True :param: lock_timeout: how long to lock the resource for. May be none, in which case it will default :return: Tuple of (Suggestion Object, Lock Object) """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("application", [ {"arg": application_id, "allow_none" : False, "arg_name" : "application_id"}, {"arg": lock_application, "instance" : bool, "allow_none" : False, "arg_name" : "lock_journal"}, {"arg": lock_account, "instance" : models.Account, "allow_none" : True, "arg_name" : "lock_account"}, {"arg": lock_timeout, "instance" : int, "allow_none" : True, "arg_name" : "lock_timeout"} ], exceptions.ArgumentException) # pull the application from the database application = models.Suggestion.pull(application_id) # if we've retrieved the journal, and a lock is requested, request it the_lock = None if application is not None and lock_application: if lock_account is not None: the_lock = lock.lock(constants.LOCK_APPLICATION, application_id, lock_account.id, lock_timeout) else: raise exceptions.ArgumentException("If you specify lock_application on application retrieval, you must also provide lock_account") return application, the_lock
def get_duplicates(self, article, max_results=10): """ Get all known duplicates of an article If the owner id is provided, this will limit the search to duplicates owned by that owner :param article: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("get_duplicates", [ {"arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article"}, ], exceptions.ArgumentException) possible_articles_dict = self.discover_duplicates(article, max_results) if not possible_articles_dict: return [] # We don't need the details of duplicate types, so flatten the lists. all_possible_articles = [article for dup_type in list(possible_articles_dict.values()) for article in dup_type] # An article may fulfil more than one duplication criteria, so needs to be de-duplicated ids = [] possible_articles = [] for a in all_possible_articles: if a.id not in ids: ids.append(a.id) possible_articles.append(a) # Sort the articles newest -> oldest by last_updated so we can get the most recent at [0] possible_articles.sort(key=lambda x: datetime.strptime(x.last_updated, "%Y-%m-%dT%H:%M:%SZ"), reverse=True) return possible_articles[:max_results]
def patch_application(self, target, source): # first validate the incoming arguments to ensure that we've got the right thing argvalidate("application_2_journal", [{ "arg": target, "instance": models.Suggestion, "allow_none": False, "arg_name": "target" }, { "arg": source, "instance": models.Suggestion, "allow_none": False, "arg_name": "source" }], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering patch_application") if target.article_metadata is None: target.article_metadata = source.article_metadata saly = source.articles_last_year taly = target.articles_last_year if taly is None: taly = {} if taly.get("count") is None: taly["count"] = saly.get("count") if taly.get("url") is None: taly["url"] = saly.get("url") target.set_articles_last_year(taly.get("count"), taly.get("url")) if app.logger.isEnabledFor("debug"): app.logger.debug("Completed patch_application")
def can_edit_application(self, account, application): """ Is the given account allowed to edit the update request application :param account: the account doing the action :param application: the application the account wants to edit :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("can_edit_update_request", [ {"arg": account, "instance": models.Account, "allow_none" : False, "arg_name" : "account"}, {"arg": application, "instance": models.Suggestion, "allow_none" : False, "arg_name" : "application"}, ], exceptions.ArgumentException) # if this is the super user, they have all rights if account.is_super: return True if not account.has_role("publisher"): raise exceptions.AuthoriseException(reason=exceptions.AuthoriseException.WRONG_ROLE) if account.id != application.owner: raise exceptions.AuthoriseException(reason=exceptions.AuthoriseException.NOT_OWNER) if application.application_status not in [ constants.APPLICATION_STATUS_PENDING, constants.APPLICATION_STATUS_UPDATE_REQUEST, constants.APPLICATION_STATUS_REVISIONS_REQUIRED ]: raise exceptions.AuthoriseException(reason=exceptions.AuthoriseException.WRONG_STATUS) return True
def can_view_application(self, account, application): """ Is the given account allowed to view the update request application :param account: the account doing the action :param application: the application the account wants to edit :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("can_edit_update_request", [ { "arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account" }, { "arg": application, "instance": models.Suggestion, "allow_none": False, "arg_name": "application" }, ], exceptions.ArgumentException) # if this is the super user, they have all rights if account.is_super: return True if not account.has_role("publisher"): raise exceptions.AuthoriseException( reason=exceptions.AuthoriseException.WRONG_ROLE) if account.id != application.owner: raise exceptions.AuthoriseException( reason=exceptions.AuthoriseException.NOT_OWNER) return True
def discover_duplicates(self, article, owner=None, results_per_match_type=10): """ Identify duplicates, separated by duplication criteria If the owner id is provided, this will limit the search to duplicates owned by that owner :param article: :param owner: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("discover_duplicates", [ {"arg": article, "instance" : models.Article, "allow_none" : False, "arg_name" : "article"}, {"arg" : owner, "instance" : unicode, "allow_none" : True, "arg_name" : "owner"} ], exceptions.ArgumentException) # Get the owner's ISSNs issns = [] if owner is not None: issns = models.Journal.issns_by_owner(owner) # We'll need the article bibjson a few times b = article.bibjson() # if we get more than one result, we'll record them here, and then at the end # if we haven't got a definitive match we'll pick the most likely candidate # (this isn't as bad as it sounds - the identifiers are pretty reliable, this catches # issues like where there are already duplicates in the data, and not matching one # of them propagates the issue) possible_articles = {} found = False # Checking by DOI is our first step # dois = b.get_identifiers(b.DOI) doi = article.get_normalised_doi() if doi is not None: if isinstance(doi, basestring) and doi != '': articles = models.Article.duplicates(issns=issns, doi=doi, size=results_per_match_type) if len(articles) > 0: possible_articles['doi'] = [a for a in articles if a.id != article.id] if len(possible_articles['doi']) > 0: found = True # Second test is to look by fulltext url fulltext = article.get_normalised_fulltext() if fulltext is not None: articles = models.Article.duplicates(issns=issns, fulltexts=fulltext, size=results_per_match_type) if len(articles) > 0: possible_articles['fulltext'] = [a for a in articles if a.id != article.id] if possible_articles['fulltext']: found = True if doi is None and fulltext is None: raise exceptions.DuplicateArticleException(Messages.EXCEPTION_DETECT_DUPLICATE_NO_ID) return possible_articles if found else None
def is_legitimate_owner(self, article, owner): """ Determine if the owner id is the owner of the article :param article: an article model :param owner: string account ID :return: True or False """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("is_legitimate_owner", [ {"arg": article, "instance" : models.Article, "allow_none" : False, "arg_name" : "article"}, {"arg" : owner, "instance" : unicode, "allow_none" : False, "arg_name" : "owner"} ], exceptions.ArgumentException) # get all the issns for the article b = article.bibjson() article_issns = b.get_identifiers(b.P_ISSN) article_issns += b.get_identifiers(b.E_ISSN) # check each issn against the index, and if a related journal is found # record the owner of that journal owners = [] seen_journal_issns = {} for issn in article_issns: journals = models.Journal.find_by_issn(issn) if journals is not None and len(journals) > 0: for j in journals: owners.append(j.owner) if j.owner not in seen_journal_issns: seen_journal_issns[j.owner] = [] seen_journal_issns[j.owner] += j.bibjson().issns() # deduplicate the list of owners owners = list(set(owners)) # no owner means we can't confirm if len(owners) == 0: return False # multiple owners means ownership of this article is confused if len(owners) > 1: return False # if the found owner is not the same as the desired owner, return false if owners[0] != owner: return False # single owner must still know of all supplied issns journal_issns = set(seen_journal_issns[owner]) for issn in article_issns: if issn not in journal_issns: return False return True
def is_legitimate_owner(self, article, owner): """ Determine if the owner id is the owner of the article :param article: an article model :param owner: string account ID :return: True or False """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("is_legitimate_owner", [ {"arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article"}, {"arg": owner, "instance": str, "allow_none": False, "arg_name": "owner"} ], exceptions.ArgumentException) # get all the issns for the article b = article.bibjson() article_issns = b.get_identifiers(b.P_ISSN) article_issns += b.get_identifiers(b.E_ISSN) # check each issn against the index, and if a related journal is found # record the owner of that journal owners = [] seen_journal_issns = {} for issn in article_issns: journals = models.Journal.find_by_issn(issn) if journals is not None and len(journals) > 0: for j in journals: owners.append(j.owner) if j.owner not in seen_journal_issns: seen_journal_issns[j.owner] = [] seen_journal_issns[j.owner] += j.bibjson().issns() # deduplicate the list of owners owners = list(set(owners)) # no owner means we can't confirm if len(owners) == 0: return False # multiple owners means ownership of this article is confused if len(owners) > 1: return False # if the found owner is not the same as the desired owner, return false if owners[0] != owner: return False # single owner must still know of all supplied issns journal_issns = set(seen_journal_issns[owner]) for issn in article_issns: if issn not in journal_issns: return False return True
def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False, dry_run=False): """ Create an individual article in the database This method will check and merge any duplicates, and report back on successes and failures in a manner consistent with batch_create_articles. :param article: The article to be created :param account: The account creating the article :param duplicate_check: Whether to check for duplicates in the database :param merge_duplicate: Whether to merge duplicate if found. If set to False, may result in a DuplicateArticleException :param limit_to_account: Whether to limit create to when the account owns the journal to which the article belongs :param add_journal_info: Should we fetch the journal info and attach it to the article before save? :param dry_run: Whether to actuall save, or if this is just to either see if it would work, or to prep for a batch ingest :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("create_article", [ {"arg": article, "instance" : models.Article, "allow_none" : False, "arg_name" : "article"}, {"arg": account, "instance" : models.Account, "allow_none" : False, "arg_name" : "account"}, {"arg" : duplicate_check, "instance" : bool, "allow_none" : False, "arg_name" : "duplicate_check"}, {"arg" : merge_duplicate, "instance" : bool, "allow_none" : False, "arg_name" : "merge_duplicate"}, {"arg" : limit_to_account, "instance" : bool, "allow_none" : False, "arg_name" : "limit_to_account"}, {"arg" : add_journal_info, "instance" : bool, "allow_none" : False, "arg_name" : "add_journal_info"}, {"arg" : dry_run, "instance" : bool, "allow_none" : False, "arg_name" : "dry_run"} ], exceptions.ArgumentException) if limit_to_account: legit = self.is_legitimate_owner(article, account.id) if not legit: owned, shared, unowned, unmatched = self.issn_ownership_status(article, account.id) return {"success" : 0, "fail" : 1, "update" : 0, "new" : 0, "shared" : shared, "unowned" : unowned, "unmatched" : unmatched} # before saving, we need to determine whether this is a new article # or an update is_update = 0 if duplicate_check: duplicate = self.get_duplicate(article, account.id) if duplicate is not None: if merge_duplicate: is_update = 1 article.merge(duplicate) # merge will take the old id, so this will overwrite else: raise exceptions.DuplicateArticleException() if add_journal_info: article.add_journal_metadata() # finally, save the new article if not dry_run: article.save() return {"success" : 1, "fail" : 0, "update" : is_update, "new" : 1 - is_update, "shared" : set(), "unowned" : set(), "unmatched" : set()}
def application(self, application_id, lock_application=False, lock_account=None, lock_timeout=None): """ Function to retrieve an application by its id :param application_id: the id of the application :param: lock_application: should we lock the resource on retrieval :param: lock_account: which account is doing the locking? Must be present if lock_journal=True :param: lock_timeout: how long to lock the resource for. May be none, in which case it will default :return: Tuple of (Suggestion Object, Lock Object) """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("application", [{ "arg": application_id, "allow_none": False, "arg_name": "application_id" }, { "arg": lock_application, "instance": bool, "allow_none": False, "arg_name": "lock_journal" }, { "arg": lock_account, "instance": models.Account, "allow_none": True, "arg_name": "lock_account" }, { "arg": lock_timeout, "instance": int, "allow_none": True, "arg_name": "lock_timeout" }], exceptions.ArgumentException) # pull the application from the database application = models.Suggestion.pull(application_id) # if we've retrieved the journal, and a lock is requested, request it the_lock = None if application is not None and lock_application: if lock_account is not None: the_lock = lock.lock(constants.LOCK_APPLICATION, application_id, lock_account.id, lock_timeout) else: raise exceptions.ArgumentException( "If you specify lock_application on application retrieval, you must also provide lock_account" ) return application, the_lock
def discover_duplicates(self, article, results_per_match_type=10, include_article=True): """ Identify duplicates, separated by duplication criteria If the owner id is provided, this will limit the search to duplicates owned by that owner :param article: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("discover_duplicates", [ {"arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article"}, ], exceptions.ArgumentException) # if we get more than one result, we'll record them here, and then at the end # if we haven't got a definitive match we'll pick the most likely candidate # (this isn't as bad as it sounds - the identifiers are pretty reliable, this catches # issues like where there are already duplicates in the data, and not matching one # of them propagates the issue) possible_articles = {} found = False # Checking by DOI is our first step # dois = b.get_identifiers(b.DOI) doi = article.get_normalised_doi() if doi is not None: if isinstance(doi, str) and doi != '': articles = models.Article.duplicates(doi=doi, size=results_per_match_type) if len(articles) > 0: if include_article: possible_articles['doi'] = [a for a in articles] else: possible_articles['doi'] = [a for a in articles if a.id != article.id] if len(possible_articles['doi']) > 0: found = True # Second test is to look by fulltext url fulltext = article.get_normalised_fulltext() if fulltext is not None: articles = models.Article.duplicates(fulltexts=fulltext, size=results_per_match_type) if len(articles) > 0: if include_article: possible_articles['fulltext'] = [a for a in articles] else: possible_articles['fulltext'] = [a for a in articles if a.id != article.id] if possible_articles['fulltext']: found = True if doi is None and fulltext is None: raise exceptions.DuplicateArticleException(Messages.EXCEPTION_DETECT_DUPLICATE_NO_ID) return possible_articles if found else None
def get_duplicate(self, article, owner=None): """ Get at most one one, most recent, duplicate article for the supplied article. If the owner id is provided, this will limit the search to duplicates owned by that owner :param article: :param owner: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("get_duplicate", [ {"arg": article, "instance" : models.Article, "allow_none" : False, "arg_name" : "article"}, {"arg" : owner, "instance" : unicode, "allow_none" : True, "arg_name" : "owner"} ], exceptions.ArgumentException) dup = self.get_duplicates(article, owner, max_results=2) if len(dup) > 1: raise exceptions.ArticleMergeConflict(Messages.EXCEPTION_ARTICLE_MERGE_CONFLICT) if dup: return dup.pop() else: return None
def patch_application(self, target, source): # first validate the incoming arguments to ensure that we've got the right thing argvalidate("application_2_journal", [ {"arg": target, "instance" : models.Suggestion, "allow_none" : False, "arg_name" : "target"}, {"arg" : source, "instance" : models.Suggestion, "allow_none" : False, "arg_name" : "source"} ], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering patch_application") if target.article_metadata is None: target.article_metadata = source.article_metadata saly = source.articles_last_year taly = target.articles_last_year if taly is None: taly = {} if taly.get("count") is None: taly["count"] = saly.get("count") if taly.get("url") is None: taly["url"] = saly.get("url") target.set_articles_last_year(taly.get("count"), taly.get("url")) if app.logger.isEnabledFor("debug"): app.logger.debug("Completed patch_application")
def can_create_update_request(self, account, journal): """ Is the given account allowed to create an update request from the given journal :param account: the account doing the action :param journal: the journal the account wants to create an update request from :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("can_create_update_request", [ { "arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account" }, { "arg": journal, "instance": models.Journal, "allow_none": False, "arg_name": "journal" }, ], exceptions.ArgumentException) # if this is the super user, they have all rights if account.is_super: return True if not account.has_role("publisher"): raise exceptions.AuthoriseException( reason=exceptions.AuthoriseException.WRONG_ROLE) if account.id != journal.owner: raise exceptions.AuthoriseException( reason=exceptions.AuthoriseException.NOT_OWNER) return True
def can_create_update_request(self, account, journal): """ Is the given account allowed to create an update request from the given journal :param account: the account doing the action :param journal: the journal the account wants to create an update request from :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("can_create_update_request", [ {"arg": account, "instance": models.Account, "allow_none" : False, "arg_name" : "account"}, {"arg": journal, "instance": models.Journal, "allow_none" : False, "arg_name" : "journal"}, ], exceptions.ArgumentException) # if this is the super user, they have all rights if account.is_super: return True if not account.has_role("publisher"): raise exceptions.AuthoriseException(reason=exceptions.AuthoriseException.WRONG_ROLE) if account.id != journal.owner: raise exceptions.AuthoriseException(reason=exceptions.AuthoriseException.NOT_OWNER) return True
def update_request_for_journal(self, journal_id, account=None, lock_timeout=None): """ Obtain an update request application object for the journal with the given journal_id An update request may either be loaded from the database, if it already exists, or created in-memory if it has not previously been created. If an account is provided, this will validate that the account holder is allowed to make the conversion from journal to application, if a conversion is required. When this request runs, the journal will be locked to the provided account if an account is given. If the application is loaded from the database, this will also be locked for the account holder. :param journal_id: :param account: :return: a tuple of (Application Object, Journal Lock, Application Lock) """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("update_request_for_journal", [ {"arg": journal_id, "instance" : basestring, "allow_none" : False, "arg_name" : "journal_id"}, {"arg" : account, "instance" : models.Account, "allow_none" : True, "arg_name" : "account"}, {"arg" : lock_timeout, "instance" : int, "allow_none" : True, "arg_name" : "lock_timeout"} ], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering update_request_for_journal") journalService = DOAJ.journalService() authService = DOAJ.authorisationService() # first retrieve the journal, and return empty if there isn't one. # We don't attempt to obtain a lock at this stage, as we want to check that the user is authorised first journal_lock = None journal, _ = journalService.journal(journal_id) if journal is None: app.logger.info("Request for journal {x} did not find anything in the database".format(x=journal_id)) return None, None, None # if the journal is not in_doaj, we won't create an update request for it if not journal.is_in_doaj(): app.logger.info("Request for journal {x} found it is not in_doaj; will not create update request".format(x=journal_id)) return None, None, None # retrieve the latest application attached to this journal application_lock = None application = models.Suggestion.find_latest_by_current_journal(journal_id) # if no such application exists, create one in memory (this will check that the user is permitted to create one) # at the same time, create the lock for the journal. This will throw an AuthorisedException or a Locked exception # (in that order of preference) if any problems arise. if application is None: app.logger.info("No existing update request for journal {x}; creating one".format(x=journal.id)) application = journalService.journal_2_application(journal, account=account) lra_id = journal.latest_related_application_id() if lra_id is not None: lra, _ = self.application(lra_id) if lra is not None: self.patch_application(application, lra) if account is not None: journal_lock = lock.lock("journal", journal_id, account.id) # otherwise check that the user (if given) has the rights to edit the application # then lock the application and journal to the account. # If a lock cannot be obtained, unlock the journal and application before we return elif account is not None: try: authService.can_edit_application(account, application) application_lock = lock.lock("suggestion", application.id, account.id) journal_lock = lock.lock("journal", journal_id, account.id) except lock.Locked as e: if application_lock is not None: application_lock.delete() if journal_lock is not None: journal_lock.delete() raise except exceptions.AuthoriseException as e: msg = "Account {x} is not permitted to edit the current update request on journal {y}".format(x=account.id, y=journal.id) app.logger.info(msg) e.message = msg raise app.logger.info("Using existing application {y} as update request for journal {x}".format(y=application.id, x=journal.id)) if app.logger.isEnabledFor("debug"): app.logger.debug("Completed update_request_for_journal; return application object") return application, journal_lock, application_lock
def reject_application(self, application, account, provenance=True, note=None, manual_update=True): """ Reject an application. This will: * set the application status to "rejected" (if not already) * remove the current_journal field, and move it to related_journal (if needed) * remove the current_application field from the related journal (if needed) * save the application * write a provenance record for the rejection (if requested) :param application: :param account: :param provenance: :param manual_update: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("reject_application", [ {"arg": application, "instance" : models.Suggestion, "allow_none" : False, "arg_name" : "application"}, {"arg" : account, "instance" : models.Account, "allow_none" : False, "arg_name" : "account"}, {"arg" : provenance, "instance" : bool, "allow_none" : False, "arg_name" : "provenance"}, {"arg" : note, "instance" : basestring, "allow_none" : True, "arg_name" : "note"}, {"arg" : manual_update, "instance" : bool, "allow_none" : False, "arg_name" : "manual_update"} ], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering reject_application") journalService = DOAJ.journalService() # check we're allowed to carry out this action if not account.has_role("reject_application"): raise exceptions.AuthoriseException(message="This user is not allowed to reject applications", reason=exceptions.AuthoriseException.WRONG_ROLE) # ensure the application status is "rejected" if application.application_status != constants.APPLICATION_STATUS_REJECTED: application.set_application_status(constants.APPLICATION_STATUS_REJECTED) # add the note to the application if note is not None: application.add_note(note) # retrieve the id of the current journal if there is one cj_id = application.current_journal cj = None # if there is a current_journal record, remove it, and record # it as a related journal. This will let us come back later and know # which journal record this was intended as an update against if needed. if cj_id is not None: cj, _ = journalService.journal(cj_id) application.remove_current_journal() if cj is not None: application.set_related_journal(cj_id) cj.remove_current_application() # if there is a current journal, we will have modified it above, so save it if cj is not None: saved = cj.save() if saved is None: raise exceptions.SaveException("Save on current_journal in reject_application failed") # if we were asked to record this as a manual update, record that on the application if manual_update: application.set_last_manual_update() saved = application.save() if saved is None: raise exceptions.SaveException("Save on application in reject_application failed") # record a provenance record that this action took place if provenance: models.Provenance.make(account, constants.PROVENANCE_STATUS_REJECTED, application) if app.logger.isEnabledFor("debug"): app.logger.debug("Completed reject_application")
def batch_create_articles(self, articles, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False): """ Create a batch of articles in a single operation. Articles are either all created/updated or none of them are This method checks for duplicates within the provided set and within the current database (if you set duplicate_check=True) :param articles: The list of article objects :param account: The account creating the articles :param duplicate_check: Whether to check for duplicates in the batch and in the index :param merge_duplicate: Should duplicates be merged. If set to False, this may raise a DuplicateArticleException :param limit_to_account: Should the ingest be limited only to articles for journals owned by the account. If set to True, may result in an IngestException :param add_journal_info: Should we fetch the journal info and attach it to the article before save? :return: a report on the state of the import: {success: x, fail: x, update: x, new: x, shared: [], unowned: [], unmatched: []} """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("batch_create_article", [{ "arg": articles, "instance": list, "allow_none": False, "arg_name": "articles" }, { "arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account" }, { "arg": duplicate_check, "instance": bool, "allow_none": False, "arg_name": "duplicate_check" }, { "arg": merge_duplicate, "instance": bool, "allow_none": False, "arg_name": "merge_duplicate" }, { "arg": limit_to_account, "instance": bool, "allow_none": False, "arg_name": "limit_to_account" }, { "arg": add_journal_info, "instance": bool, "allow_none": False, "arg_name": "add_journal_info" }], exceptions.ArgumentException) # 1. dedupe the batch if duplicate_check: batch_duplicates = self._batch_contains_duplicates(articles) if batch_duplicates: report = { "success": 0, "fail": len(articles), "update": 0, "new": 0, "shared": [], "unowned": [], "unmatched": [] } raise exceptions.IngestException( message=Messages.EXCEPTION_ARTICLE_BATCH_DUPLICATE, result=report) # 2. check legitimate ownership success = 0 fail = 0 update = 0 new = 0 all_shared = set() all_unowned = set() all_unmatched = set() for article in articles: try: result = self.create_article(article, account, duplicate_check=duplicate_check, merge_duplicate=merge_duplicate, limit_to_account=limit_to_account, add_journal_info=add_journal_info, dry_run=True) except exceptions.ArticleMergeConflict: raise exceptions.IngestException( message=Messages.EXCEPTION_ARTICLE_BATCH_CONFLICT) success += result.get("success", 0) fail += result.get("fail", 0) update += result.get("update", 0) new += result.get("new", 0) all_shared.update(result.get("shared", set())) all_unowned.update(result.get("unowned", set())) all_unmatched.update(result.get("unmatched", set())) report = { "success": success, "fail": fail, "update": update, "new": new, "shared": all_shared, "unowned": all_unowned, "unmatched": all_unmatched } # if there were no failures in the batch, then we can do the save if fail == 0: for i in range(len(articles)): block = i == len(articles) - 1 # block on the final save, so that when this method returns, all articles are # available in the index articles[i].save(blocking=block) # return some stats on the import return report else: raise exceptions.IngestException( message=Messages.EXCEPTION_ARTICLE_BATCH_FAIL, result=report)
def batch_create_articles(self, articles, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False): """ Create a batch of articles in a single operation. Articles are either all created/updated or none of them are This method checks for duplicates within the provided set and within the current database (if you set duplicate_check=True) :param articles: The list of article objects :param account: The account creating the articles :param duplicate_check: Whether to check for duplicates in the batch and in the index :param merge_duplicate: Should duplicates be merged. If set to False, this may raise a DuplicateArticleException :param limit_to_account: Should the ingest be limited only to articles for journals owned by the account. If set to True, may result in an IngestException :param add_journal_info: Should we fetch the journal info and attach it to the article before save? :return: a report on the state of the import: {success: x, fail: x, update: x, new: x, shared: [], unowned: [], unmatched: []} """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("batch_create_article", [ {"arg": articles, "instance" : list, "allow_none" : False, "arg_name" : "articles"}, {"arg": account, "instance" : models.Account, "allow_none" : False, "arg_name" : "account"}, {"arg" : duplicate_check, "instance" : bool, "allow_none" : False, "arg_name" : "duplicate_check"}, {"arg" : merge_duplicate, "instance" : bool, "allow_none" : False, "arg_name" : "merge_duplicate"}, {"arg" : limit_to_account, "instance" : bool, "allow_none" : False, "arg_name" : "limit_to_account"}, {"arg" : add_journal_info, "instance" : bool, "allow_none" : False, "arg_name" : "add_journal_info"} ], exceptions.ArgumentException) # 1. dedupe the batch if duplicate_check: batch_duplicates = self._batch_contains_duplicates(articles) if batch_duplicates: report = {"success" : 0, "fail" : len(articles), "update" : 0, "new" : 0, "shared" : [], "unowned" : [], "unmatched" : []} raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_DUPLICATE, result=report) # 2. check legitimate ownership success = 0 fail = 0 update = 0 new = 0 all_shared = set() all_unowned = set() all_unmatched = set() for article in articles: try: result = self.create_article(article, account, duplicate_check=duplicate_check, merge_duplicate=merge_duplicate, limit_to_account=limit_to_account, add_journal_info=add_journal_info, dry_run=True) except exceptions.ArticleMergeConflict: raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_CONFLICT) success += result.get("success", 0) fail += result.get("fail", 0) update += result.get("update", 0) new += result.get("new", 0) all_shared.update(result.get("shared", set())) all_unowned.update(result.get("unowned", set())) all_unmatched.update(result.get("unmatched", set())) report = {"success" : success, "fail" : fail, "update" : update, "new" : new, "shared" : all_shared, "unowned" : all_unowned, "unmatched" : all_unmatched} # if there were no failures in the batch, then we can do the save if fail == 0: for i in range(len(articles)): block = i == len(articles) - 1 # block on the final save, so that when this method returns, all articles are # available in the index articles[i].save(blocking=block) # return some stats on the import return report else: raise exceptions.IngestException(message=Messages.EXCEPTION_ARTICLE_BATCH_FAIL, result=report)
def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False, dry_run=False, update_article_id=None): """ Create an individual article in the database This method will check and merge any duplicates, and report back on successes and failures in a manner consistent with batch_create_articles. :param article: The article to be created :param account: The account creating the article :param duplicate_check: Whether to check for duplicates in the database :param merge_duplicate: Whether to merge duplicate if found. If set to False, may result in a DuplicateArticleException :param limit_to_account: Whether to limit create to when the account owns the journal to which the article belongs :param add_journal_info: Should we fetch the journal info and attach it to the article before save? :param dry_run: Whether to actuall save, or if this is just to either see if it would work, or to prep for a batch ingest :param update_article_id: The article id that it is supposed to be an update to; taken into consideration ONLY if duplicate_check == True and merge_duplicate == True :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("create_article", [ {"arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article"}, {"arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account"}, {"arg": duplicate_check, "instance": bool, "allow_none": False, "arg_name": "duplicate_check"}, {"arg": merge_duplicate, "instance": bool, "allow_none": False, "arg_name": "merge_duplicate"}, {"arg": limit_to_account, "instance": bool, "allow_none": False, "arg_name": "limit_to_account"}, {"arg": add_journal_info, "instance": bool, "allow_none": False, "arg_name": "add_journal_info"}, {"arg": dry_run, "instance": bool, "allow_none": False, "arg_name": "dry_run"}, {"arg": update_article_id, "instance": str, "allow_none": True, "arg_name": "update_article_id"} ], exceptions.ArgumentException) # quickly validate that the article is acceptable - it must have a DOI and/or a fulltext # this raises an exception if the article is not acceptable, containing all the relevant validation details self.is_acceptable(article) has_permissions_result = self.has_permissions(account, article, limit_to_account) if isinstance(has_permissions_result,dict): return has_permissions_result is_update = 0 if duplicate_check: duplicate = self.get_duplicate(article) try: if account.has_role("admin") and update_article_id is not None: # is update_article_id is None then treat as normal publisher upload # for testing by admin is_update = self._prepare_update_admin(article, duplicate, update_article_id, merge_duplicate) else: is_update = self._prepare_update_publisher(article, duplicate, merge_duplicate, account, limit_to_account) except (exceptions.DuplicateArticleException, exceptions.ArticleMergeConflict, exceptions.ConfigurationException) as e: raise e if add_journal_info: article.add_journal_metadata() # finally, save the new article if not dry_run: article.save() return {"success": 1, "fail": 0, "update": is_update, "new": 1 - is_update, "shared": set(), "unowned": set(), "unmatched": set()}
def application_2_journal(self, application, manual_update=True): # first validate the incoming arguments to ensure that we've got the right thing argvalidate("application_2_journal", [ {"arg": application, "instance" : models.Suggestion, "allow_none" : False, "arg_name" : "application"}, {"arg" : manual_update, "instance" : bool, "allow_none" : False, "arg_name" : "manual_update"} ], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering application_2_journal") # create a new blank journal record, which we can build up journal = models.Journal() # first thing is to copy the bibjson as-is wholesale, and set active=True abj = application.bibjson() journal.set_bibjson(abj) jbj = journal.bibjson() jbj.active = True # now carry over key administrative properties from the application itself # * contacts # * notes # * editor # * editor_group # * owner # * seal contacts = application.contacts() notes = application.notes for contact in contacts: journal.add_contact(contact.get("name"), contact.get("email")) if application.editor is not None: journal.set_editor(application.editor) if application.editor_group is not None: journal.set_editor_group(application.editor_group) for note in notes: journal.add_note(note.get("note"), note.get("date")) if application.owner is not None: journal.set_owner(application.owner) journal.set_seal(application.has_seal()) # no relate the journal to the application and place it in_doaj journal.add_related_application(application.id, dates.now()) journal.set_in_doaj(True) # if we've been called in the context of a manual update, record that if manual_update: journal.set_last_manual_update() # if this is an update to an existing journal, then we can also port information from # that journal if application.current_journal is not None: cj = models.Journal.pull(application.current_journal) if cj is not None: # carry the id and the created date journal.set_id(cj.id) journal.set_created(cj.created_date) # bring forward any notes from the old journal record old_notes = cj.notes for note in old_notes: journal.add_note(note.get("note"), note.get("date")) # bring forward any related applications related = cj.related_applications for r in related: journal.add_related_application(r.get("application_id"), r.get("date_accepted"), r.get("status")) # ignore any previously set bulk_upload reference # carry over any properties that are not already set from the application # * contact # * editor & editor_group (together or not at all) # * owner if len(journal.contacts()) == 0: old_contacts = cj.contacts() for contact in old_contacts: journal.add_contact(contact.get("name"), contact.get("email")) if journal.editor is None and journal.editor_group is None: journal.set_editor(cj.editor) journal.set_editor_group(cj.editor_group) if journal.owner is None: journal.set_owner(cj.owner) if app.logger.isEnabledFor("debug"): app.logger.debug("Completing application_2_journal") return journal
def delete_application(self, application_id, account): """ Function to delete an application, and all references to it in other objects (current and related journals) The application and all related journals need to be locked before this process can proceed, so you may get a lock.Locked exception :param application_id: :param account: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("delete_application", [{ "arg": application_id, "instance": unicode, "allow_none": False, "arg_name": "application_id" }, { "arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account" }], exceptions.ArgumentException) journalService = DOAJ.journalService() authService = DOAJ.authorisationService() # get hold of a copy of the application. If there isn't one, our work here is done # (note the application could be locked, in which case this will raise a lock.Locked exception) # get the application application, _ = self.application(application_id) if application is None: raise exceptions.NoSuchObjectException # determine if the user can edit the application authService.can_edit_application(account, application) # attempt to lock the record (this may raise a Locked exception) alock = lock.lock(constants.LOCK_APPLICATION, application_id, account.id) # obtain the current journal, with associated lock current_journal = None cjlock = None if application.current_journal is not None: try: current_journal, cjlock = journalService.journal( application.current_journal, lock_journal=True, lock_account=account) except lock.Locked as e: # if the resource is locked, we have to back out if alock is not None: alock.delete() raise # obtain the related journal, with associated lock related_journal = None rjlock = None if application.related_journal is not None: try: related_journal, rjlock = journalService.journal( application.related_journal, lock_journal=True, lock_account=account) except lock.Locked as e: # if the resource is locked, we have to back out if alock is not None: alock.delete() if cjlock is not None: cjlock.delete() raise try: if current_journal is not None: current_journal.remove_current_application() saved = current_journal.save() if saved is None: raise exceptions.SaveException( "Unable to save journal record") if related_journal is not None: relation_record = related_journal.related_application_record( application_id) if relation_record is None: relation_record = {} related_journal.add_related_application( application_id, relation_record.get("date_accepted"), "deleted") saved = related_journal.save() if saved is None: raise exceptions.SaveException( "Unable to save journal record") application.delete() finally: if alock is not None: alock.delete() if cjlock is not None: cjlock.delete() if rjlock is not None: rjlock.delete() return
def csv(self, prune=True): """ Generate the Journal CSV :param set_cache: whether to update the cache :param out_dir: the directory to output the file to. If set_cache is True, this argument will be overridden by the cache container :return: Tuple of (attachment_name, URL) """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("csv", [ {"arg": prune, "allow_none" : False, "arg_name" : "prune"} ], exceptions.ArgumentException) filename = 'journalcsv__doaj_' + datetime.strftime(datetime.utcnow(), '%Y%m%d_%H%M') + '_utf8.csv' container_id = app.config.get("STORE_CACHE_CONTAINER") tmpStore = StoreFactory.tmp() out = tmpStore.path(container_id, filename, create_container=True, must_exist=False) YES_NO = {True: 'Yes', False: 'No', None: '', '': ''} def _make_journals_csv(file_object): """ Make a CSV file of information for all journals. :param file_object: a utf8 encoded file object. """ cols = {} for j in models.Journal.all_in_doaj(page_size=100000): # 10x how many journals we have right now assert isinstance(j, models.Journal) # for pycharm type inspection bj = j.bibjson() issn = bj.get_one_identifier(idtype=bj.P_ISSN) if issn is None: issn = bj.get_one_identifier(idtype=bj.E_ISSN) if issn is None: continue kvs = Journal2QuestionXwalk.journal2question(j) meta_kvs = _get_doaj_meta_kvs(j) article_kvs = _get_article_kvs(j) cols[issn] = kvs + meta_kvs + article_kvs issns = cols.keys() issns.sort() csvwriter = clcsv.UnicodeWriter(file_object) qs = None for i in issns: if qs is None: qs = [q for q, _ in cols[i]] csvwriter.writerow(qs) vs = [v for _, v in cols[i]] csvwriter.writerow(vs) def _get_doaj_meta_kvs(journal): """ Get key, value pairs for some meta information we want from the journal object :param journal: a models.Journal :return: a list of (key, value) tuples for our metadata """ kvs = [ ("DOAJ Seal", YES_NO.get(journal.has_seal(), "")), ("Tick: Accepted after March 2014", YES_NO.get(journal.is_ticked(), "")), ("Added on Date", journal.created_date), ("Subjects", ' | '.join(journal.bibjson().lcc_paths())) ] return kvs def _get_article_kvs(journal): stats = journal.article_stats() kvs = [ ("Number of Article Records", str(stats.get("total"))), ("Most Recent Article Added", stats.get("latest")) ] return kvs with codecs.open(out, 'wb', encoding='utf-8') as csvfile: _make_journals_csv(csvfile) mainStore = StoreFactory.get("cache") try: mainStore.store(container_id, filename, source_path=out) url = mainStore.url(container_id, filename) finally: tmpStore.delete_file(container_id, filename) # don't delete the container, just in case someone else is writing to it action_register = [] if prune: def sort(filelist): rx = "journalcsv__doaj_(.+?)_utf8.csv" return sorted(filelist, key=lambda x: datetime.strptime(re.match(rx, x).groups(1)[0], '%Y%m%d_%H%M'), reverse=True) def filter(filename): return filename.startswith("journalcsv__") action_register = prune_container(mainStore, container_id, sort, filter=filter, keep=2) # update the ES record to point to the new file models.Cache.cache_csv(url) return url, action_register
def application_2_journal(self, application, manual_update=True): # first validate the incoming arguments to ensure that we've got the right thing argvalidate("application_2_journal", [{ "arg": application, "instance": models.Suggestion, "allow_none": False, "arg_name": "application" }, { "arg": manual_update, "instance": bool, "allow_none": False, "arg_name": "manual_update" }], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering application_2_journal") # create a new blank journal record, which we can build up journal = models.Journal() # first thing is to copy the bibjson as-is wholesale, and set active=True abj = application.bibjson() journal.set_bibjson(abj) jbj = journal.bibjson() jbj.active = True # now carry over key administrative properties from the application itself # * contacts # * notes # * editor # * editor_group # * owner # * seal contacts = application.contacts() notes = application.notes for contact in contacts: journal.add_contact(contact.get("name"), contact.get("email")) if application.editor is not None: journal.set_editor(application.editor) if application.editor_group is not None: journal.set_editor_group(application.editor_group) for note in notes: journal.add_note(note.get("note"), note.get("date")) if application.owner is not None: journal.set_owner(application.owner) journal.set_seal(application.has_seal()) # no relate the journal to the application and place it in_doaj journal.add_related_application(application.id, dates.now()) journal.set_in_doaj(True) # if we've been called in the context of a manual update, record that if manual_update: journal.set_last_manual_update() # if this is an update to an existing journal, then we can also port information from # that journal if application.current_journal is not None: cj = models.Journal.pull(application.current_journal) if cj is not None: # carry the id and the created date journal.set_id(cj.id) journal.set_created(cj.created_date) # bring forward any notes from the old journal record old_notes = cj.notes for note in old_notes: journal.add_note(note.get("note"), note.get("date")) # bring forward any related applications related = cj.related_applications for r in related: journal.add_related_application(r.get("application_id"), r.get("date_accepted"), r.get("status")) # ignore any previously set bulk_upload reference # carry over any properties that are not already set from the application # * contact # * editor & editor_group (together or not at all) # * owner if len(journal.contacts()) == 0: old_contacts = cj.contacts() for contact in old_contacts: journal.add_contact(contact.get("name"), contact.get("email")) if journal.editor is None and journal.editor_group is None: journal.set_editor(cj.editor) journal.set_editor_group(cj.editor_group) if journal.owner is None: journal.set_owner(cj.owner) if app.logger.isEnabledFor("debug"): app.logger.debug("Completing application_2_journal") return journal
def delete_application(self, application_id, account): """ Function to delete an application, and all references to it in other objects (current and related journals) The application and all related journals need to be locked before this process can proceed, so you may get a lock.Locked exception :param application_id: :param account: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("delete_application", [ {"arg": application_id, "instance" : unicode, "allow_none" : False, "arg_name" : "application_id"}, {"arg" : account, "instance" : models.Account, "allow_none" : False, "arg_name" : "account"} ], exceptions.ArgumentException) journalService = DOAJ.journalService() authService = DOAJ.authorisationService() # get hold of a copy of the application. If there isn't one, our work here is done # (note the application could be locked, in which case this will raise a lock.Locked exception) # get the application application, _ = self.application(application_id) if application is None: raise exceptions.NoSuchObjectException # determine if the user can edit the application authService.can_edit_application(account, application) # attempt to lock the record (this may raise a Locked exception) alock = lock.lock(constants.LOCK_APPLICATION, application_id, account.id) # obtain the current journal, with associated lock current_journal = None cjlock = None if application.current_journal is not None: try: current_journal, cjlock = journalService.journal(application.current_journal, lock_journal=True, lock_account=account) except lock.Locked as e: # if the resource is locked, we have to back out if alock is not None: alock.delete() raise # obtain the related journal, with associated lock related_journal = None rjlock = None if application.related_journal is not None: try: related_journal, rjlock = journalService.journal(application.related_journal, lock_journal=True, lock_account=account) except lock.Locked as e: # if the resource is locked, we have to back out if alock is not None: alock.delete() if cjlock is not None: cjlock.delete() raise try: if current_journal is not None: current_journal.remove_current_application() saved = current_journal.save() if saved is None: raise exceptions.SaveException("Unable to save journal record") if related_journal is not None: relation_record = related_journal.related_application_record(application_id) if relation_record is None: relation_record = {} related_journal.add_related_application(application_id, relation_record.get("date_accepted"), "deleted") saved = related_journal.save() if saved is None: raise exceptions.SaveException("Unable to save journal record") application.delete() finally: if alock is not None: alock.delete() if cjlock is not None: cjlock.delete() if rjlock is not None: rjlock.delete() return
def accept_application(self, application, account, manual_update=True, provenance=True, save_journal=True, save_application=True): """ Take the given application and create the Journal object in DOAJ for it. The account provided must have permission to create journals from applications. :param application: The application to be converted :param account: The account doing the conversion :param manual_update: Whether to record this update as a manual update on both the application and journal objects :param provenance: Whether to write provenance records for this operation :param save_journal: Whether to save the journal that is produced :param save_application: Whether to save the application after it has been modified :return: The journal that was created """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("accept_application", [ {"arg": application, "instance" : models.Suggestion, "allow_none" : False, "arg_name" : "application"}, {"arg" : account, "instance" : models.Account, "allow_none" : False, "arg_name" : "account"}, {"arg" : manual_update, "instance" : bool, "allow_none" : False, "arg_name" : "manual_update"}, {"arg" : provenance, "instance" : bool, "allow_none" : False, "arg_name" : "provenance"}, {"arg" : save_journal, "instance" : bool, "allow_none" : False, "arg_name" : "save_journal"}, {"arg" : save_application, "instance" : bool, "allow_none" : False, "arg_name" : "save_application"} ], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering accept_application") # ensure that the account holder has a suitable role if not account.has_role("accept_application"): raise exceptions.AuthoriseException( message="User {x} is not permitted to accept application {y}".format(x=account.id, y=application.id), reason=exceptions.AuthoriseException.WRONG_ROLE) # ensure the application status is "accepted" if application.application_status != constants.APPLICATION_STATUS_ACCEPTED: application.set_application_status(constants.APPLICATION_STATUS_ACCEPTED) # make the resulting journal (and save it if requested) j = self.application_2_journal(application, manual_update=manual_update) if save_journal is True: saved = j.save() if saved is None: raise exceptions.SaveException("Save of resulting journal in accept_application failed") # retrieve the id of the current journal if there is one cj = application.current_journal # if there is a current_journal record, remove it if cj is not None: application.remove_current_journal() # set the relationship with the journal application.set_related_journal(j.id) # if we were asked to record this as a manual update, record that on the application # (the journal is done implicitly above) if manual_update: application.set_last_manual_update() if provenance: # record the event in the provenance tracker models.Provenance.make(account, constants.PROVENANCE_STATUS_ACCEPTED, application) # save the application if requested if save_application is True: application.save() if app.logger.isEnabledFor("debug"): app.logger.debug("Completed accept_application") return j
def update_request_for_journal(self, journal_id, account=None, lock_timeout=None): """ Obtain an update request application object for the journal with the given journal_id An update request may either be loaded from the database, if it already exists, or created in-memory if it has not previously been created. If an account is provided, this will validate that the account holder is allowed to make the conversion from journal to application, if a conversion is required. When this request runs, the journal will be locked to the provided account if an account is given. If the application is loaded from the database, this will also be locked for the account holder. :param journal_id: :param account: :return: a tuple of (Application Object, Journal Lock, Application Lock) """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("update_request_for_journal", [{ "arg": journal_id, "instance": basestring, "allow_none": False, "arg_name": "journal_id" }, { "arg": account, "instance": models.Account, "allow_none": True, "arg_name": "account" }, { "arg": lock_timeout, "instance": int, "allow_none": True, "arg_name": "lock_timeout" }], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering update_request_for_journal") journalService = DOAJ.journalService() authService = DOAJ.authorisationService() # first retrieve the journal, and return empty if there isn't one. # We don't attempt to obtain a lock at this stage, as we want to check that the user is authorised first journal_lock = None journal, _ = journalService.journal(journal_id) if journal is None: app.logger.info( "Request for journal {x} did not find anything in the database" .format(x=journal_id)) return None, None, None # if the journal is not in_doaj, we won't create an update request for it if not journal.is_in_doaj(): app.logger.info( "Request for journal {x} found it is not in_doaj; will not create update request" .format(x=journal_id)) return None, None, None # retrieve the latest application attached to this journal application_lock = None application = models.Suggestion.find_latest_by_current_journal( journal_id) # if no such application exists, create one in memory (this will check that the user is permitted to create one) # at the same time, create the lock for the journal. This will throw an AuthorisedException or a Locked exception # (in that order of preference) if any problems arise. if application is None: app.logger.info( "No existing update request for journal {x}; creating one". format(x=journal.id)) application = journalService.journal_2_application(journal, account=account) lra_id = journal.latest_related_application_id() if lra_id is not None: lra, _ = self.application(lra_id) if lra is not None: self.patch_application(application, lra) if account is not None: journal_lock = lock.lock("journal", journal_id, account.id) # otherwise check that the user (if given) has the rights to edit the application # then lock the application and journal to the account. # If a lock cannot be obtained, unlock the journal and application before we return elif account is not None: try: authService.can_edit_application(account, application) application_lock = lock.lock("suggestion", application.id, account.id) journal_lock = lock.lock("journal", journal_id, account.id) except lock.Locked as e: if application_lock is not None: application_lock.delete() if journal_lock is not None: journal_lock.delete() raise except exceptions.AuthoriseException as e: msg = "Account {x} is not permitted to edit the current update request on journal {y}".format( x=account.id, y=journal.id) app.logger.info(msg) e.message = msg raise app.logger.info( "Using existing application {y} as update request for journal {x}" .format(y=application.id, x=journal.id)) if app.logger.isEnabledFor("debug"): app.logger.debug( "Completed update_request_for_journal; return application object" ) return application, journal_lock, application_lock
def accept_application(self, application, account, manual_update=True, provenance=True, save_journal=True, save_application=True): """ Take the given application and create the Journal object in DOAJ for it. The account provided must have permission to create journals from applications. :param application: The application to be converted :param account: The account doing the conversion :param manual_update: Whether to record this update as a manual update on both the application and journal objects :param provenance: Whether to write provenance records for this operation :param save_journal: Whether to save the journal that is produced :param save_application: Whether to save the application after it has been modified :return: The journal that was created """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("accept_application", [{ "arg": application, "instance": models.Suggestion, "allow_none": False, "arg_name": "application" }, { "arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account" }, { "arg": manual_update, "instance": bool, "allow_none": False, "arg_name": "manual_update" }, { "arg": provenance, "instance": bool, "allow_none": False, "arg_name": "provenance" }, { "arg": save_journal, "instance": bool, "allow_none": False, "arg_name": "save_journal" }, { "arg": save_application, "instance": bool, "allow_none": False, "arg_name": "save_application" }], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering accept_application") # ensure that the account holder has a suitable role if not account.has_role("accept_application"): raise exceptions.AuthoriseException( message="User {x} is not permitted to accept application {y}". format(x=account.id, y=application.id), reason=exceptions.AuthoriseException.WRONG_ROLE) # ensure the application status is "accepted" if application.application_status != constants.APPLICATION_STATUS_ACCEPTED: application.set_application_status( constants.APPLICATION_STATUS_ACCEPTED) # make the resulting journal (and save it if requested) j = self.application_2_journal(application, manual_update=manual_update) if save_journal is True: saved = j.save() if saved is None: raise exceptions.SaveException( "Save of resulting journal in accept_application failed") # retrieve the id of the current journal if there is one cj = application.current_journal # if there is a current_journal record, remove it if cj is not None: application.remove_current_journal() # set the relationship with the journal application.set_related_journal(j.id) # if we were asked to record this as a manual update, record that on the application # (the journal is done implicitly above) if manual_update: application.set_last_manual_update() if provenance: # record the event in the provenance tracker models.Provenance.make(account, constants.PROVENANCE_STATUS_ACCEPTED, application) # save the application if requested if save_application is True: application.save() if app.logger.isEnabledFor("debug"): app.logger.debug("Completed accept_application") return j
def issn_ownership_status(self, article, owner): """ Determine the ownership status of the supplied owner over the issns in the given article This will give you a tuple back which lists the following (in order): * which issns are owned by that owner * which issns are shared with another owner * which issns are not owned by this owner * which issns are not found in the DOAJ database :param article: :param owner: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("issn_ownership_status", [ {"arg": article, "instance" : models.Article, "allow_none" : False, "arg_name" : "article"}, {"arg" : owner, "instance" : unicode, "allow_none" : False, "arg_name" : "owner"} ], exceptions.ArgumentException) # get all the issns for the article b = article.bibjson() issns = b.get_identifiers(b.P_ISSN) issns += b.get_identifiers(b.E_ISSN) owned = [] shared = [] unowned = [] unmatched = [] # check each issn against the index, and if a related journal is found # record the owner of that journal seen_issns = {} for issn in issns: journals = models.Journal.find_by_issn(issn) if journals is not None and len(journals) > 0: for j in journals: if issn not in seen_issns: seen_issns[issn] = set() if j.owner is not None: seen_issns[issn].add(j.owner) for issn in issns: if issn not in seen_issns.keys(): unmatched.append(issn) for issn, owners in seen_issns.iteritems(): owners = list(owners) if len(owners) == 0: unowned.append(issn) elif len(owners) == 1 and owners[0] == owner: owned.append(issn) elif len(owners) == 1 and owners[0] != owner: unowned.append(issn) elif len(owners) > 1: if owner in owners: shared.append(issn) else: unowned.append(issn) return owned, shared, unowned, unmatched
def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False, dry_run=False): """ Create an individual article in the database This method will check and merge any duplicates, and report back on successes and failures in a manner consistent with batch_create_articles. :param article: The article to be created :param account: The account creating the article :param duplicate_check: Whether to check for duplicates in the database :param merge_duplicate: Whether to merge duplicate if found. If set to False, may result in a DuplicateArticleException :param limit_to_account: Whether to limit create to when the account owns the journal to which the article belongs :param add_journal_info: Should we fetch the journal info and attach it to the article before save? :param dry_run: Whether to actuall save, or if this is just to either see if it would work, or to prep for a batch ingest :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("create_article", [{ "arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article" }, { "arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account" }, { "arg": duplicate_check, "instance": bool, "allow_none": False, "arg_name": "duplicate_check" }, { "arg": merge_duplicate, "instance": bool, "allow_none": False, "arg_name": "merge_duplicate" }, { "arg": limit_to_account, "instance": bool, "allow_none": False, "arg_name": "limit_to_account" }, { "arg": add_journal_info, "instance": bool, "allow_none": False, "arg_name": "add_journal_info" }, { "arg": dry_run, "instance": bool, "allow_none": False, "arg_name": "dry_run" }], exceptions.ArgumentException) if limit_to_account: legit = self.is_legitimate_owner(article, account.id) if not legit: owned, shared, unowned, unmatched = self.issn_ownership_status( article, account.id) return { "success": 0, "fail": 1, "update": 0, "new": 0, "shared": shared, "unowned": unowned, "unmatched": unmatched } # before saving, we need to determine whether this is a new article # or an update is_update = 0 if duplicate_check: duplicate = self.get_duplicate(article, account.id) if duplicate is not None: if merge_duplicate: is_update = 1 article.merge( duplicate ) # merge will take the old id, so this will overwrite else: raise exceptions.DuplicateArticleException() if add_journal_info: article.add_journal_metadata() # finally, save the new article if not dry_run: article.save() return { "success": 1, "fail": 0, "update": is_update, "new": 1 - is_update, "shared": set(), "unowned": set(), "unmatched": set() }
def journal_2_application(self, journal, account=None, keep_editors=False): """ Function to convert a given journal into an application object. Provide the journal, and it will be converted in-memory to the application object (currently a Suggestion). The new application WILL NOT be saved by this method. If an account is provided, this will validate that the account holder is allowed to make this conversion :param journal: a journal to convert :param account: an account doing the action - optional, if specified the application will only be created if the account is allowed to :return: Suggestion object """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("journal_2_application", [ {"arg": journal, "instance" : models.Journal, "allow_none" : False, "arg_name" : "journal"}, {"arg" : account, "instance" : models.Account, "arg_name" : "account"} ], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering journal_2_application") authService = DOAJ.authorisationService() # if an account is specified, check that it is allowed to perform this action if account is not None: try: authService.can_create_update_request(account, journal) # throws exception if not allowed except exceptions.AuthoriseException as e: msg = "Account {x} is not permitted to create an update request on journal {y}".format(x=account.id, y=journal.id) app.logger.info(msg) e.message = msg raise # copy all the relevant information from the journal to the application bj = journal.bibjson() contacts = journal.contacts() notes = journal.notes first_contact = None application = models.Suggestion() application.set_application_status(constants.APPLICATION_STATUS_UPDATE_REQUEST) for c in contacts: application.add_contact(c.get("name"), c.get("email")) if first_contact is None: first_contact = c application.set_current_journal(journal.id) if keep_editors is True: if journal.editor is not None: application.set_editor(journal.editor) if journal.editor_group is not None: application.set_editor_group(journal.editor_group) for n in notes: application.add_note(n.get("note"), n.get("date")) application.set_owner(journal.owner) application.set_seal(journal.has_seal()) application.set_bibjson(bj) if first_contact is not None: application.set_suggester(first_contact.get("name"), first_contact.get("email")) application.suggested_on = dates.now() if app.logger.isEnabledFor("debug"): app.logger.debug("Completed journal_2_application; return application object") return application
def issn_ownership_status(self, article, owner): """ Determine the ownership status of the supplied owner over the issns in the given article This will give you a tuple back which lists the following (in order): * which issns are owned by that owner * which issns are shared with another owner * which issns are not owned by this owner * which issns are not found in the DOAJ database :param article: :param owner: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("issn_ownership_status", [{ "arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article" }, { "arg": owner, "instance": unicode, "allow_none": False, "arg_name": "owner" }], exceptions.ArgumentException) # get all the issns for the article b = article.bibjson() issns = b.get_identifiers(b.P_ISSN) issns += b.get_identifiers(b.E_ISSN) owned = [] shared = [] unowned = [] unmatched = [] # check each issn against the index, and if a related journal is found # record the owner of that journal seen_issns = {} for issn in issns: journals = models.Journal.find_by_issn(issn) if journals is not None and len(journals) > 0: for j in journals: if issn not in seen_issns: seen_issns[issn] = set() if j.owner is not None: seen_issns[issn].add(j.owner) for issn in issns: if issn not in seen_issns.keys(): unmatched.append(issn) for issn, owners in seen_issns.iteritems(): owners = list(owners) if len(owners) == 0: unowned.append(issn) elif len(owners) == 1 and owners[0] == owner: owned.append(issn) elif len(owners) == 1 and owners[0] != owner: unowned.append(issn) elif len(owners) > 1: if owner in owners: shared.append(issn) else: unowned.append(issn) return owned, shared, unowned, unmatched
def reject_application(self, application, account, provenance=True, note=None, manual_update=True): """ Reject an application. This will: * set the application status to "rejected" (if not already) * remove the current_journal field, and move it to related_journal (if needed) * remove the current_application field from the related journal (if needed) * save the application * write a provenance record for the rejection (if requested) :param application: :param account: :param provenance: :param manual_update: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("reject_application", [{ "arg": application, "instance": models.Suggestion, "allow_none": False, "arg_name": "application" }, { "arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account" }, { "arg": provenance, "instance": bool, "allow_none": False, "arg_name": "provenance" }, { "arg": note, "instance": basestring, "allow_none": True, "arg_name": "note" }, { "arg": manual_update, "instance": bool, "allow_none": False, "arg_name": "manual_update" }], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering reject_application") journalService = DOAJ.journalService() # check we're allowed to carry out this action if not account.has_role("reject_application"): raise exceptions.AuthoriseException( message="This user is not allowed to reject applications", reason=exceptions.AuthoriseException.WRONG_ROLE) # ensure the application status is "rejected" if application.application_status != constants.APPLICATION_STATUS_REJECTED: application.set_application_status( constants.APPLICATION_STATUS_REJECTED) # add the note to the application if note is not None: application.add_note(note) # retrieve the id of the current journal if there is one cj_id = application.current_journal cj = None # if there is a current_journal record, remove it, and record # it as a related journal. This will let us come back later and know # which journal record this was intended as an update against if needed. if cj_id is not None: cj, _ = journalService.journal(cj_id) application.remove_current_journal() if cj is not None: application.set_related_journal(cj_id) cj.remove_current_application() # if there is a current journal, we will have modified it above, so save it if cj is not None: saved = cj.save() if saved is None: raise exceptions.SaveException( "Save on current_journal in reject_application failed") # if we were asked to record this as a manual update, record that on the application if manual_update: application.set_last_manual_update() saved = application.save() if saved is None: raise exceptions.SaveException( "Save on application in reject_application failed") # record a provenance record that this action took place if provenance: models.Provenance.make(account, constants.PROVENANCE_STATUS_REJECTED, application) if app.logger.isEnabledFor("debug"): app.logger.debug("Completed reject_application")
def csv(self, prune=True): """ Generate the Journal CSV :param set_cache: whether to update the cache :param out_dir: the directory to output the file to. If set_cache is True, this argument will be overridden by the cache container :return: Tuple of (attachment_name, URL) """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("csv", [{ "arg": prune, "allow_none": False, "arg_name": "prune" }], exceptions.ArgumentException) filename = 'journalcsv__doaj_' + datetime.strftime( datetime.utcnow(), '%Y%m%d_%H%M') + '_utf8.csv' container_id = app.config.get("STORE_CACHE_CONTAINER") tmpStore = StoreFactory.tmp() out = tmpStore.path(container_id, filename, create_container=True, must_exist=False) YES_NO = {True: 'Yes', False: 'No', None: '', '': ''} def _make_journals_csv(file_object): """ Make a CSV file of information for all journals. :param file_object: a utf8 encoded file object. """ cols = {} for j in models.Journal.all_in_doaj( page_size=100000 ): # 10x how many journals we have right now assert isinstance( j, models.Journal) # for pycharm type inspection bj = j.bibjson() issn = bj.get_one_identifier(idtype=bj.P_ISSN) if issn is None: issn = bj.get_one_identifier(idtype=bj.E_ISSN) if issn is None: continue kvs = Journal2QuestionXwalk.journal2question(j) meta_kvs = _get_doaj_meta_kvs(j) article_kvs = _get_article_kvs(j) cols[issn] = kvs + meta_kvs + article_kvs issns = cols.keys() csvwriter = csv.writer(file_object) qs = None for i in sorted(issns): if qs is None: qs = [q for q, _ in cols[i]] csvwriter.writerow(qs) vs = [v for _, v in cols[i]] csvwriter.writerow(vs) def _get_doaj_meta_kvs(journal): """ Get key, value pairs for some meta information we want from the journal object :param journal: a models.Journal :return: a list of (key, value) tuples for our metadata """ kvs = [("DOAJ Seal", YES_NO.get(journal.has_seal(), "")), ("Tick: Accepted after March 2014", YES_NO.get(journal.is_ticked(), "")), ("Added on Date", journal.created_date), ("Subjects", ' | '.join(journal.bibjson().lcc_paths()))] return kvs def _get_article_kvs(journal): stats = journal.article_stats() kvs = [("Number of Article Records", str(stats.get("total"))), ("Most Recent Article Added", stats.get("latest"))] return kvs with open(out, 'w', encoding='utf-8') as csvfile: _make_journals_csv(csvfile) mainStore = StoreFactory.get("cache") try: mainStore.store(container_id, filename, source_path=out) url = mainStore.url(container_id, filename) finally: tmpStore.delete_file( container_id, filename ) # don't delete the container, just in case someone else is writing to it action_register = [] if prune: def sort(filelist): rx = "journalcsv__doaj_(.+?)_utf8.csv" return sorted(filelist, key=lambda x: datetime.strptime( re.match(rx, x).groups(1)[0], '%Y%m%d_%H%M'), reverse=True) def filter(filename): return filename.startswith("journalcsv__") action_register = prune_container(mainStore, container_id, sort, filter=filter, keep=2) # update the ES record to point to the new file models.Cache.cache_csv(url) return url, action_register
def journal_2_application(self, journal, account=None, keep_editors=False): """ Function to convert a given journal into an application object. Provide the journal, and it will be converted in-memory to the application object (currently a Suggestion). The new application WILL NOT be saved by this method. If an account is provided, this will validate that the account holder is allowed to make this conversion :param journal: a journal to convert :param account: an account doing the action - optional, if specified the application will only be created if the account is allowed to :return: Suggestion object """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("journal_2_application", [{ "arg": journal, "instance": models.Journal, "allow_none": False, "arg_name": "journal" }, { "arg": account, "instance": models.Account, "arg_name": "account" }], exceptions.ArgumentException) if app.logger.isEnabledFor(logging.DEBUG): app.logger.debug("Entering journal_2_application") authService = DOAJ.authorisationService() # if an account is specified, check that it is allowed to perform this action if account is not None: try: authService.can_create_update_request( account, journal) # throws exception if not allowed except exceptions.AuthoriseException as e: msg = "Account {x} is not permitted to create an update request on journal {y}".format( x=account.id, y=journal.id) app.logger.info(msg) e.args += (msg, ) raise # copy all the relevant information from the journal to the application bj = journal.bibjson() contacts = journal.contacts() notes = journal.notes first_contact = None application = models.Suggestion() application.set_application_status( constants.APPLICATION_STATUS_UPDATE_REQUEST) for c in contacts: application.add_contact(c.get("name"), c.get("email")) if first_contact is None: first_contact = c application.set_current_journal(journal.id) if keep_editors is True: if journal.editor is not None: application.set_editor(journal.editor) if journal.editor_group is not None: application.set_editor_group(journal.editor_group) for n in notes: application.add_note(n.get("note"), n.get("date")) application.set_owner(journal.owner) application.set_seal(journal.has_seal()) application.set_bibjson(bj) if first_contact is not None: application.set_suggester(first_contact.get("name"), first_contact.get("email")) application.suggested_on = dates.now() if app.logger.isEnabledFor(logging.DEBUG): app.logger.debug( "Completed journal_2_application; return application object") return application
def is_legitimate_owner(self, article, owner): """ Determine if the owner id is the owner of the article :param article: :param owner: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("is_legitimate_owner", [{ "arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article" }, { "arg": owner, "instance": unicode, "allow_none": False, "arg_name": "owner" }], exceptions.ArgumentException) # get all the issns for the article b = article.bibjson() issns = b.get_identifiers(b.P_ISSN) issns += b.get_identifiers(b.E_ISSN) # check each issn against the index, and if a related journal is found # record the owner of that journal owners = [] seen_issns = {} for issn in issns: journals = models.Journal.find_by_issn(issn) if journals is not None and len(journals) > 0: for j in journals: owners.append(j.owner) if j.owner not in seen_issns: seen_issns[j.owner] = [] seen_issns[j.owner] += j.bibjson().issns() # deduplicate the list of owners owners = list(set(owners)) # no owner means we can't confirm if len(owners) == 0: return False # multiple owners means ownership of this article is confused if len(owners) > 1: return False # single owner must still know of all supplied issns compare = list(set(seen_issns[owners[0]])) if len( compare ) == 2: # we only want to check issn parity for journals where there is more than one issn available. for issn in issns: if issn not in compare: return False # true if the found owner is the same as the desired owner, otherwise false return owners[0] == owner