Beispiel #1
0
 def mock(*args, **kwargs):
     print(duplicate_result, update_article_id)
     if result == -1:
         raise exceptions.ConfigurationException
     if result == 0:
         raise exceptions.DuplicateArticleException("duplicate result is 'different'")
     return 1
Beispiel #2
0
    def _prepare_update_publisher(self, article, duplicate, merge_duplicate, account, limit_to_account):
        # before saving, we need to determine whether this is a new article
        # or an update
        is_update = 0

        if duplicate is not None:  # else -> it is new article
            # check if can update the duplicate - if is the owner
            has_permissions_result = self.has_permissions(account, article, limit_to_account)
            if isinstance(has_permissions_result, bool) and has_permissions_result == True:
                doi_or_ft_updated = self._doi_or_fulltext_updated(article, duplicate.id)
                if doi_or_ft_updated or not merge_duplicate:
                    raise exceptions.DuplicateArticleException()
                else:
                    is_update += 1
                    article.merge(duplicate)
            else:
                raise exceptions.DuplicateArticleException()
        return is_update
Beispiel #3
0
    def discover_duplicates(self, article, results_per_match_type=10, include_article=True):
        """
        Identify duplicates, separated by duplication criteria

        If the owner id is provided, this will limit the search to duplicates owned by that owner

        :param article:
        :return:
        """
        # first validate the incoming arguments to ensure that we've got the right thing
        argvalidate("discover_duplicates", [
            {"arg": article, "instance": models.Article, "allow_none": False, "arg_name": "article"},
        ], exceptions.ArgumentException)

        # if we get more than one result, we'll record them here, and then at the end
        # if we haven't got a definitive match we'll pick the most likely candidate
        # (this isn't as bad as it sounds - the identifiers are pretty reliable, this catches
        # issues like where there are already duplicates in the data, and not matching one
        # of them propagates the issue)
        possible_articles = {}
        found = False

        # Checking by DOI is our first step
        # dois = b.get_identifiers(b.DOI)
        doi = article.get_normalised_doi()
        if doi is not None:
            if isinstance(doi, str) and doi != '':
                articles = models.Article.duplicates(doi=doi, size=results_per_match_type)
                if len(articles) > 0:
                    if include_article:
                        possible_articles['doi'] = [a for a in articles]
                    else:
                        possible_articles['doi'] = [a for a in articles if a.id != article.id]
                    if len(possible_articles['doi']) > 0:
                        found = True

        # Second test is to look by fulltext url
        fulltext = article.get_normalised_fulltext()
        if fulltext is not None:
            articles = models.Article.duplicates(fulltexts=fulltext, size=results_per_match_type)
            if len(articles) > 0:
                if include_article:
                    possible_articles['fulltext'] = [a for a in articles]
                else:
                    possible_articles['fulltext'] = [a for a in articles if a.id != article.id]
                if possible_articles['fulltext']:
                    found = True

        if doi is None and fulltext is None:
            raise exceptions.DuplicateArticleException(Messages.EXCEPTION_DETECT_DUPLICATE_NO_ID)

        return possible_articles if found else None
Beispiel #4
0
    def _prepare_update_admin(self, article, duplicate, update_article_id, merge_duplicate):

        is_update = 0
        if duplicate is not None:
            if duplicate.id != update_article_id:
                # it means that doi or ft url has been changed so that it duplicates existing article
                raise exceptions.DuplicateArticleException()
            elif merge_duplicate:
                is_update += 1
                article.merge(duplicate)
        elif merge_duplicate:  # requested to update article has both url and doi changed to new values - no duplicate detected
            is_update += 1
            art = models.Article.pull(update_article_id)
            article.merge(art)

        return is_update
Beispiel #5
0
    def create_article(self,
                       article,
                       account,
                       duplicate_check=True,
                       merge_duplicate=True,
                       limit_to_account=True,
                       add_journal_info=False,
                       dry_run=False):
        """
        Create an individual article in the database

        This method will check and merge any duplicates, and report back on successes and failures in a manner consistent with
        batch_create_articles.

        :param article: The article to be created
        :param account:     The account creating the article
        :param duplicate_check:     Whether to check for duplicates in the database
        :param merge_duplicate:     Whether to merge duplicate if found.  If set to False, may result in a DuplicateArticleException
        :param limit_to_account:    Whether to limit create to when the account owns the journal to which the article belongs
        :param add_journal_info:    Should we fetch the journal info and attach it to the article before save?
        :param dry_run:     Whether to actuall save, or if this is just to either see if it would work, or to prep for a batch ingest
        :return:
        """
        # first validate the incoming arguments to ensure that we've got the right thing
        argvalidate("create_article", [{
            "arg": article,
            "instance": models.Article,
            "allow_none": False,
            "arg_name": "article"
        }, {
            "arg": account,
            "instance": models.Account,
            "allow_none": False,
            "arg_name": "account"
        }, {
            "arg": duplicate_check,
            "instance": bool,
            "allow_none": False,
            "arg_name": "duplicate_check"
        }, {
            "arg": merge_duplicate,
            "instance": bool,
            "allow_none": False,
            "arg_name": "merge_duplicate"
        }, {
            "arg": limit_to_account,
            "instance": bool,
            "allow_none": False,
            "arg_name": "limit_to_account"
        }, {
            "arg": add_journal_info,
            "instance": bool,
            "allow_none": False,
            "arg_name": "add_journal_info"
        }, {
            "arg": dry_run,
            "instance": bool,
            "allow_none": False,
            "arg_name": "dry_run"
        }], exceptions.ArgumentException)

        if limit_to_account:
            legit = self.is_legitimate_owner(article, account.id)
            if not legit:
                owned, shared, unowned, unmatched = self.issn_ownership_status(
                    article, account.id)
                return {
                    "success": 0,
                    "fail": 1,
                    "update": 0,
                    "new": 0,
                    "shared": shared,
                    "unowned": unowned,
                    "unmatched": unmatched
                }

        # before saving, we need to determine whether this is a new article
        # or an update
        is_update = 0
        if duplicate_check:
            duplicate = self.get_duplicate(article, account.id)
            if duplicate is not None:
                if merge_duplicate:
                    is_update = 1
                    article.merge(
                        duplicate
                    )  # merge will take the old id, so this will overwrite
                else:
                    raise exceptions.DuplicateArticleException()

        if add_journal_info:
            article.add_journal_metadata()

        # finally, save the new article
        if not dry_run:
            article.save()

        return {
            "success": 1,
            "fail": 0,
            "update": is_update,
            "new": 1 - is_update,
            "shared": set(),
            "unowned": set(),
            "unmatched": set()
        }
Beispiel #6
0
 def mock(*args, **kwargs):
     if result == -1:
         raise exceptions.DuplicateArticleException()
     return result