def mock(*args, **kwargs): print(duplicate_result, update_article_id) if result == -1: raise exceptions.ConfigurationException if result == 0: raise exceptions.DuplicateArticleException("duplicate result is 'different'") return 1
def _prepare_update_publisher(self, article, duplicate, merge_duplicate, account, limit_to_account): # before saving, we need to determine whether this is a new article # or an update is_update = 0 if duplicate is not None: # else -> it is new article # check if can update the duplicate - if is the owner has_permissions_result = self.has_permissions(account, article, limit_to_account) if isinstance(has_permissions_result, bool) and has_permissions_result == True: doi_or_ft_updated = self._doi_or_fulltext_updated(article, duplicate.id) if doi_or_ft_updated or not merge_duplicate: raise exceptions.DuplicateArticleException() else: is_update += 1 article.merge(duplicate) else: raise exceptions.DuplicateArticleException() return is_update
def discover_duplicates(self, article, results_per_match_type=10, include_article=True): """ Identify duplicates, separated by duplication criteria If the owner id is provided, this will limit the search to duplicates owned by that owner :param article: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("discover_duplicates", [ {"arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article"}, ], exceptions.ArgumentException) # if we get more than one result, we'll record them here, and then at the end # if we haven't got a definitive match we'll pick the most likely candidate # (this isn't as bad as it sounds - the identifiers are pretty reliable, this catches # issues like where there are already duplicates in the data, and not matching one # of them propagates the issue) possible_articles = {} found = False # Checking by DOI is our first step # dois = b.get_identifiers(b.DOI) doi = article.get_normalised_doi() if doi is not None: if isinstance(doi, str) and doi != '': articles = models.Article.duplicates(doi=doi, size=results_per_match_type) if len(articles) > 0: if include_article: possible_articles['doi'] = [a for a in articles] else: possible_articles['doi'] = [a for a in articles if a.id != article.id] if len(possible_articles['doi']) > 0: found = True # Second test is to look by fulltext url fulltext = article.get_normalised_fulltext() if fulltext is not None: articles = models.Article.duplicates(fulltexts=fulltext, size=results_per_match_type) if len(articles) > 0: if include_article: possible_articles['fulltext'] = [a for a in articles] else: possible_articles['fulltext'] = [a for a in articles if a.id != article.id] if possible_articles['fulltext']: found = True if doi is None and fulltext is None: raise exceptions.DuplicateArticleException(Messages.EXCEPTION_DETECT_DUPLICATE_NO_ID) return possible_articles if found else None
def _prepare_update_admin(self, article, duplicate, update_article_id, merge_duplicate): is_update = 0 if duplicate is not None: if duplicate.id != update_article_id: # it means that doi or ft url has been changed so that it duplicates existing article raise exceptions.DuplicateArticleException() elif merge_duplicate: is_update += 1 article.merge(duplicate) elif merge_duplicate: # requested to update article has both url and doi changed to new values - no duplicate detected is_update += 1 art = models.Article.pull(update_article_id) article.merge(art) return is_update
def create_article(self, article, account, duplicate_check=True, merge_duplicate=True, limit_to_account=True, add_journal_info=False, dry_run=False): """ Create an individual article in the database This method will check and merge any duplicates, and report back on successes and failures in a manner consistent with batch_create_articles. :param article: The article to be created :param account: The account creating the article :param duplicate_check: Whether to check for duplicates in the database :param merge_duplicate: Whether to merge duplicate if found. If set to False, may result in a DuplicateArticleException :param limit_to_account: Whether to limit create to when the account owns the journal to which the article belongs :param add_journal_info: Should we fetch the journal info and attach it to the article before save? :param dry_run: Whether to actuall save, or if this is just to either see if it would work, or to prep for a batch ingest :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("create_article", [{ "arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article" }, { "arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account" }, { "arg": duplicate_check, "instance": bool, "allow_none": False, "arg_name": "duplicate_check" }, { "arg": merge_duplicate, "instance": bool, "allow_none": False, "arg_name": "merge_duplicate" }, { "arg": limit_to_account, "instance": bool, "allow_none": False, "arg_name": "limit_to_account" }, { "arg": add_journal_info, "instance": bool, "allow_none": False, "arg_name": "add_journal_info" }, { "arg": dry_run, "instance": bool, "allow_none": False, "arg_name": "dry_run" }], exceptions.ArgumentException) if limit_to_account: legit = self.is_legitimate_owner(article, account.id) if not legit: owned, shared, unowned, unmatched = self.issn_ownership_status( article, account.id) return { "success": 0, "fail": 1, "update": 0, "new": 0, "shared": shared, "unowned": unowned, "unmatched": unmatched } # before saving, we need to determine whether this is a new article # or an update is_update = 0 if duplicate_check: duplicate = self.get_duplicate(article, account.id) if duplicate is not None: if merge_duplicate: is_update = 1 article.merge( duplicate ) # merge will take the old id, so this will overwrite else: raise exceptions.DuplicateArticleException() if add_journal_info: article.add_journal_metadata() # finally, save the new article if not dry_run: article.save() return { "success": 1, "fail": 0, "update": is_update, "new": 1 - is_update, "shared": set(), "unowned": set(), "unmatched": set() }
def mock(*args, **kwargs): if result == -1: raise exceptions.DuplicateArticleException() return result