Ejemplo n.º 1
0
    def test_01_same_fulltext(self):
        """Check duplication detection on articles with the same fulltext URL"""

        # A list of various URLs to check matching on
        ftus = [
            "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf",
            "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf",
            "http://www.ujcem.med.sumdu.edu.ua/images/sampledata/2013/4/408_412_IV-020.pdf",
            "http://www.psychologie-aktuell.com/fileadmin/download/ptam/1-2014_20140324/01_Geiser.pdf"
        ]

        for ftu in ftus:
            # make ourselves an example article
            a = models.Article()
            b = a.bibjson()
            b.title = "Example article with a fulltext url"
            b.add_url(ftu, urltype="fulltext")
            a.save(blocking=True)

            # create a replacement article
            z = models.Article()
            y = z.bibjson()
            y.title = "Replacement article for fulltext url"
            y.add_url(ftu, urltype="fulltext")

            # determine if there's a duplicate
            articleService = DOAJ.articleService()
            d = articleService.get_duplicate(z)

            assert d is not None
            assert d.bibjson().title == "Example article with a fulltext url"
Ejemplo n.º 2
0
Archivo: query.py Proyecto: DOAJ/doaj
def query(path=None):
    """
    Query endpoint for general queries via the web interface.  Calls on the DOAJ.queryService for action

    :param path:
    :return:
    """
    pathparts = request.path.strip('/').split('/')
    if len(pathparts) < 2:
        abort(400)
    domain = pathparts[0]
    index_type = pathparts[1]

    q = None
    # if this is a POST, read the contents out of the body
    if request.method == "POST":
        q = request.json
    # if there is a source param, load the json from it
    elif 'source' in request.values:
        q = json.loads(urllib2.unquote(request.values['source']))

    try:
        account = None
        if current_user is not None and not current_user.is_anonymous:
            account = current_user._get_current_object()
        queryService = DOAJ.queryService()
        res = queryService.search(domain, index_type, q, account, request.values)
    except exceptions.AuthoriseException as e:
        abort(403)
    except exceptions.NoSuchObjectException as e:
        abort(404)

    resp = make_response(json.dumps(res))
    resp.mimetype = "application/json"
    return resp
Ejemplo n.º 3
0
    def test_07_both_duplication_criteria(self):
        """Check that an article is only reported once if it is duplicated by both DOI and fulltext URL"""
        # make ourselves an example article
        ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf"
        doi = "10.doi/123"

        a = models.Article()
        b = a.bibjson()
        b.title = "Example article with a fulltext url and a DOI"
        b.add_url(ftu, urltype="fulltext")
        b.add_identifier('doi', doi)
        a.save(blocking=True)

        # create another article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url and a DOI"
        y.add_url(ftu, urltype="fulltext")
        y.add_identifier('doi', doi)

        # determine if there's a duplicate
        articleService = DOAJ.articleService()
        d = articleService.get_duplicates(z)

        assert len(d) == 1
        print len(d)
        assert d[0].bibjson().title == "Example article with a fulltext url and a DOI"
Ejemplo n.º 4
0
    def retrieve(cls, id, account):

        # is the article id valid?
        ar = models.Article.pull(id)
        if ar is None:
            raise Api404Error()

        # at this point we're happy to return the article if it's
        # meant to be seen by the public
        if ar.is_in_doaj():
            return OutgoingArticleDO.from_model(ar)

        # as long as authentication (in the layer above) has been successful, and the account exists, then
        # we are good to proceed
        if account is None:
            raise Api401Error()

        # Check we're allowed to retrieve this article
        articleService = DOAJ.articleService()
        if not articleService.is_legitimate_owner(ar, account.id):
            raise Api404Error()  # not found for this account

        # Return the article
        oa = OutgoingArticleDO.from_model(ar)
        return oa
Ejemplo n.º 5
0
    def search(cls, index_type, account, q, page, page_size, sort=None):
        if not index_type in ['article', 'journal', 'application']:
            raise DiscoveryException("There was an error executing your query for {0}. Unknown type.)".format(index_type))

        if index_type == 'article':
            endpoint = 'search_articles'
            klass = models.Article
        elif index_type == 'journal':
            endpoint = 'search_journals'
            klass = models.Journal
        else:
            endpoint = 'search_applications'
            klass = models.Suggestion

        raw_query, page, page_size = cls._make_query(q, page, page_size, sort, index_type, False)

        # execute the query against the articles
        query_service = DOAJ.queryService()
        res = query_service.search('api_query', index_type, raw_query, account, None)

        # check to see if there was a search error
        if res.get("error") is not None:
            magic = uuid.uuid1()
            app.logger.error("Error executing discovery query search for {i}: {x} (ref: {y})".format(i=index_type, x=res.get("error"), y=magic))
            raise DiscoveryException("There was an error executing your query (ref: {y})".format(y=magic))

        obs = [klass(**raw) for raw in esprit.raw.unpack_json_result(res)]
        return cls._make_response(endpoint, res, q, page, page_size, sort, obs)
Ejemplo n.º 6
0
    def test_05_full_doi(self):
        """ Test that we still detect duplicate DOIs when we have the full URI, not just the 10. """
        # make ourselves a couple of example articles
        a = models.Article()
        b = a.bibjson()
        b.title = "Example A article with a DOI"
        b.add_identifier('doi', "https://doi.org/10.doi/123")
        a.save(blocking=True)

        # create an article which should not be caught by the duplicate detection
        not_duplicate = models.Article()
        not_duplicate_bibjson = not_duplicate.bibjson()
        not_duplicate_bibjson.title = "Example C article with a DOI"
        not_duplicate_bibjson.add_identifier('doi', "https://doi.org/10.doi/DIFFERENT")
        not_duplicate.save(blocking=True)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for DOI"
        y.add_identifier('doi', "http://doi.org/10.doi/123")

        # determine if there's a duplicate
        articleService = DOAJ.articleService()
        dups = articleService.get_duplicates(z)
        assert len(dups) == 1

        # Check when we ask for one duplicate we get the most recent duplicate.
        d = articleService.get_duplicate(z)
        assert d is not None
        assert d.bibjson().title == "Example A article with a DOI", d.bibjson().title
Ejemplo n.º 7
0
    def test_04_with_doi_instead(self):
        """Detect a duplicate using the DOI field."""
        # make ourselves a couple of example articles
        a = models.Article()
        b = a.bibjson()
        b.title = "Example A article with a DOI"
        b.add_identifier('doi', "10.doi/123")
        a.save(blocking=True)

        # create an article which should not be caught by the duplicate detection
        not_duplicate = models.Article()
        not_duplicate_bibjson = not_duplicate.bibjson()
        not_duplicate_bibjson.title = "Example C article with a DOI"
        not_duplicate_bibjson.add_identifier('doi', "10.doi/DIFFERENT")
        not_duplicate.save(blocking=True)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for DOI"
        y.add_identifier('doi', "10.doi/123")

        # determine if there's a duplicate
        articleService = DOAJ.articleService()
        dups = articleService.get_duplicates(z)
        assert len(dups) == 1

        # Check when we ask for one duplicate we get the most recent duplicate.
        d = articleService.get_duplicate(z)
        assert d is not None
        assert d.bibjson().title == "Example A article with a DOI", d.bibjson().title
Ejemplo n.º 8
0
    def scroll(cls, index_type, account, q, page_size, sort=None, scan=False):
        if not index_type in ['article', 'journal', 'application']:
            raise DiscoveryException("There was an error executing your query for {0}. Unknown type.)".format(index_type))

        page = 1 # Not used in scroll
        raw_query, page, page_size = cls._make_query(q, page, page_size, sort, index_type, True)

        # execute the query against the articles
        query_service = DOAJ.queryService()
        for result in query_service.scroll('api_query', index_type, raw_query, account, page_size, scan=scan):
            yield result
Ejemplo n.º 9
0
    def run(self):
        """
        Execute the task as specified by the background_job 
        :return:
        """
        job = self.background_job

        journalService = DOAJ.journalService()
        url, action_register = journalService.csv()
        for ar in action_register:
            job.add_audit_message(ar)
        job.add_audit_message(u"CSV generated; will be served from {y}".format(y=url))
Ejemplo n.º 10
0
    def test_03_retrieve_multiple_conflict(self):

        ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf"
        # make ourselves a couple of example articles
        a = models.Article()
        b = a.bibjson()
        b.title = "Example A article with a fulltext url"
        b.add_url(ftu, urltype="fulltext")
        a.save(blocking=True)

        # Wait a second to ensure the timestamps are different
        time.sleep(1.01)
        
        a2 = models.Article()
        b2 = a2.bibjson()
        b2.title = "Example B article with a fulltext url"
        b2.add_url(ftu, urltype="fulltext")
        a2.save(blocking=True)

        # create an article which should not be caught by the duplicate detection
        not_duplicate = models.Article()
        not_duplicate_bibjson = not_duplicate.bibjson()
        not_duplicate_bibjson.title = "Example C article with a fulltext url"
        not_duplicate_bibjson.add_url("http://this.is/a/different/url", urltype="fulltext")
        not_duplicate.save(blocking=True)
        
        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url(ftu, urltype="fulltext")
        
        # determine that there are multiple duplicates
        articleService = DOAJ.articleService()
        with self.assertRaises(ArticleMergeConflict):
            d = articleService.get_duplicate(z)

        # get the xwalk to determine all duplicates
        # sort both results and expectations here to avoid false alarm
        # we don't care about the order of duplicates
        expected = sorted([a, a2])
        # determine if there's a duplicate
        l = articleService.get_duplicates(z)
        assert isinstance(l, list), l
        assert l is not None
        l.sort()
        assert expected == l
Ejemplo n.º 11
0
    def delete(cls, id, account, dry_run=False):
        # as long as authentication (in the layer above) has been successful, and the account exists, then
        # we are good to proceed
        if account is None:
            raise Api401Error()

        # now see if there's something for us to delete
        ar = models.Article.pull(id)
        if ar is None:
            raise Api404Error()

        # Check we're allowed to retrieve this article
        articleService = DOAJ.articleService()
        if not articleService.is_legitimate_owner(ar, account.id):
            raise Api404Error()  # not found for this account

        # issue the delete (no record of the delete required)
        if not dry_run:
            ar.delete()
Ejemplo n.º 12
0
    def create(cls, data, account):
        # as long as authentication (in the layer above) has been successful, and the account exists, then
        # we are good to proceed
        if account is None:
            raise Api401Error()

        # convert the data into a suitable article model
        am = cls.prep_article(data)

        articleService = DOAJ.articleService()
        try:
            result = articleService.create_article(am, account, add_journal_info=True)
        except ArticleMergeConflict as e:
            raise Api400Error(e.message)

        # Check we are allowed to create an article for this journal
        if result.get("fail", 0) == 1:
            raise Api403Error()

        return am
Ejemplo n.º 13
0
    def test_02_different_fulltext(self):
        """Check that an article with different fulltext URLs is not considered a duplicate"""
        # make ourselves an example article
        a = models.Article()
        b = a.bibjson()
        b.title = "Example 2 article with a fulltext url"
        b.add_url("http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf", urltype="fulltext")
        a.save(blocking=True)

        # create another article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url("http://this.is/a/different/url", urltype="fulltext")
        
        # determine if there's a duplicate
        articleService = DOAJ.articleService()
        d = articleService.get_duplicate(z)
        
        assert d is None
Ejemplo n.º 14
0
    def update(cls, id, data, account):
        # as long as authentication (in the layer above) has been successful, and the account exists, then
        # we are good to proceed
        if account is None:
            raise Api401Error()

        # now see if there's something for us to delete
        ar = models.Article.pull(id)
        if ar is None:
            raise Api404Error()

        # Check we're allowed to edit this article
        articleService = DOAJ.articleService()
        if not articleService.is_legitimate_owner(ar, account.id):
            raise Api404Error()  # not found for this account

        # next thing to do is a structural validation of the replacement data, by instantiating the object
        try:
            ia = IncomingArticleDO(data)
        except dataobj.DataStructureException as e:
            raise Api400Error(e.message)

        # if that works, convert it to an Article object bringing over everything outside the
        # incoming article from the original article
        new_ar = ia.to_article_model(ar)

        # we need to ensure that any properties of the existing article that aren't allowed to change
        # are copied over
        new_ar.set_id(id)
        new_ar.set_created(ar.created_date)
        new_ar.bibjson().set_subjects(ar.bibjson().subjects())
        new_ar = cls.__handle_journal_info(new_ar)

        # finally save the new article, and return to the caller
        new_ar.save()
        return new_ar
Ejemplo n.º 15
0
    def run(self):
        job = self.background_job
        params = job.params

        # Set up the files we need to run this task - a dir to place the report, and a place to write the article csv
        outdir = self.get_param(params, "outdir", "article_duplicates_" + dates.today())
        job.add_audit_message("Saving reports to " + outdir)
        if not os.path.exists(outdir):
            os.makedirs(outdir)

        # Location for our interim CSV file of articles
        tmpdir = self.get_param(params, "tmpdir", 'tmp_article_duplicate_report')
        if not os.path.exists(tmpdir):
            os.makedirs(tmpdir)

        tmp_csvname = self.get_param(params, "article_csv", False)
        tmp_csvpath, total = self._make_csv_dump(tmpdir, tmp_csvname)

        # Initialise our reports
        global_reportfile = 'duplicate_articles_global_' + dates.today() + '.csv'
        global_reportpath = os.path.join(outdir, global_reportfile)
        f = codecs.open(global_reportpath, "wb", "utf-8")
        global_report = UnicodeWriter(f)
        header = ["article_id", "article_created", "article_doi", "article_fulltext", "article_owner", "article_issns", "article_in_doaj", "n_matches", "match_type", "match_id", "match_created", "match_doi", "match_fulltext", "match_owner", "match_issns", "match_in_doaj", "owners_match", "titles_match", "article_title", "match_title"]
        global_report.writerow(header)

        noids_reportfile = 'noids_' + dates.today() + '.csv'
        noids_reportpath = os.path.join(outdir, noids_reportfile)
        g = codecs.open(noids_reportpath, "wb", "utf-8")
        noids_report = UnicodeWriter(g)
        header = ["article_id", "article_created", "article_owner", "article_issns", "article_in_doaj"]
        noids_report.writerow(header)

        # Record the sets of duplicated articles
        global_matches = []

        a_count = 0

        articleService = DOAJ.articleService()

        # Read back in the article csv file we created earlier
        with codecs.open(tmp_csvpath, 'rb', 'utf-8') as t:
            article_reader = UnicodeReader(t)

            start = datetime.now()
            estimated_finish = ""
            for a in article_reader:
                if a_count > 1 and a_count % 100 == 0:
                    n = datetime.now()
                    diff = (n - start).total_seconds()
                    expected_total = ((diff / a_count) * total)
                    estimated_finish = dates.format(dates.after(start, expected_total))
                a_count += 1

                article = models.Article(_source={'id': a[0], 'created_date': a[1], 'bibjson': {'identifier': json.loads(a[2]), 'link': json.loads(a[3]), 'title': a[4]}, 'admin': {'in_doaj': json.loads(a[5])}})

                # Get the global duplicates
                try:
                    global_duplicates = articleService.discover_duplicates(article, owner=None, results_per_match_type=10000)
                except exceptions.DuplicateArticleException:
                    # this means the article did not have any ids that could be used for deduplication
                    owner = self._lookup_owner(article)
                    noids_report.writerow([article.id, article.created_date, owner, ','.join(article.bibjson().issns()), article.is_in_doaj()])
                    continue

                dupcount = 0
                if global_duplicates:

                    # Look up an article's owner
                    owner = self._lookup_owner(article)

                    # Deduplicate the DOI and fulltext duplicate lists
                    s = set([article.id] + [d.id for d in global_duplicates.get('doi', []) + global_duplicates.get('fulltext', [])])
                    dupcount = len(s) - 1
                    if s not in global_matches:
                        self._write_rows_from_duplicates(article, owner, global_duplicates, global_report)
                        global_matches.append(s)

                app.logger.debug('{0}/{1} {2} {3} {4} {5}'.format(a_count, total, article.id, dupcount, len(global_matches), estimated_finish))

        job.add_audit_message('{0} articles processed for duplicates. {1} global duplicate sets found.'.format(a_count, len(global_matches)))
        f.close()
        g.close()

        # Delete the transient temporary files.
        shutil.rmtree(tmpdir)

        # Email the reports if that parameter has been set.
        send_email = self.get_param(params, "email", False)
        if send_email:
            archive_name = "article_duplicates_" + dates.today()
            email_archive(outdir, archive_name)
            job.add_audit_message("email alert sent")
        else:
            job.add_audit_message("no email alert sent")
Ejemplo n.º 16
0
    def reject_application(self, application, account, provenance=True, note=None, manual_update=True):
        """
        Reject an application.  This will:
        * set the application status to "rejected" (if not already)
        * remove the current_journal field, and move it to related_journal (if needed)
        * remove the current_application field from the related journal (if needed)
        * save the application
        * write a provenance record for the rejection (if requested)

        :param application:
        :param account:
        :param provenance:
        :param manual_update:
        :return:
        """
        # first validate the incoming arguments to ensure that we've got the right thing
        argvalidate("reject_application", [
            {"arg": application, "instance" : models.Suggestion, "allow_none" : False, "arg_name" : "application"},
            {"arg" : account, "instance" : models.Account, "allow_none" : False, "arg_name" : "account"},
            {"arg" : provenance, "instance" : bool, "allow_none" : False, "arg_name" : "provenance"},
            {"arg" : note, "instance" : basestring, "allow_none" : True, "arg_name" : "note"},
            {"arg" : manual_update, "instance" : bool, "allow_none" : False, "arg_name" : "manual_update"}
        ], exceptions.ArgumentException)

        if app.logger.isEnabledFor("debug"): app.logger.debug("Entering reject_application")

        journalService = DOAJ.journalService()

        # check we're allowed to carry out this action
        if not account.has_role("reject_application"):
            raise exceptions.AuthoriseException(message="This user is not allowed to reject applications", reason=exceptions.AuthoriseException.WRONG_ROLE)

        # ensure the application status is "rejected"
        if application.application_status != constants.APPLICATION_STATUS_REJECTED:
            application.set_application_status(constants.APPLICATION_STATUS_REJECTED)

        # add the note to the application
        if note is not None:
            application.add_note(note)

         # retrieve the id of the current journal if there is one
        cj_id = application.current_journal
        cj = None

        # if there is a current_journal record, remove it, and record
        # it as a related journal.  This will let us come back later and know
        # which journal record this was intended as an update against if needed.
        if cj_id is not None:
            cj, _ = journalService.journal(cj_id)
            application.remove_current_journal()
            if cj is not None:
                application.set_related_journal(cj_id)
                cj.remove_current_application()

        # if there is a current journal, we will have modified it above, so save it
        if cj is not None:
            saved = cj.save()
            if saved is None:
                raise exceptions.SaveException("Save on current_journal in reject_application failed")

        # if we were asked to record this as a manual update, record that on the application
        if manual_update:
            application.set_last_manual_update()

        saved = application.save()
        if saved is None:
            raise exceptions.SaveException("Save on application in reject_application failed")

        # record a provenance record that this action took place
        if provenance:
            models.Provenance.make(account, constants.PROVENANCE_STATUS_REJECTED, application)

        if app.logger.isEnabledFor("debug"): app.logger.debug("Completed reject_application")
Ejemplo n.º 17
0
    def delete_application(self, application_id, account):
        """
        Function to delete an application, and all references to it in other objects (current and related journals)

        The application and all related journals need to be locked before this process can proceed, so you may get a
        lock.Locked exception

        :param application_id:
        :param account:
        :return:
        """
        # first validate the incoming arguments to ensure that we've got the right thing
        argvalidate("delete_application", [
            {"arg": application_id, "instance" : unicode, "allow_none" : False, "arg_name" : "application_id"},
            {"arg" : account, "instance" : models.Account, "allow_none" : False, "arg_name" : "account"}
        ], exceptions.ArgumentException)

        journalService = DOAJ.journalService()
        authService = DOAJ.authorisationService()

        # get hold of a copy of the application.  If there isn't one, our work here is done
        # (note the application could be locked, in which case this will raise a lock.Locked exception)

        # get the application
        application, _ = self.application(application_id)
        if application is None:
            raise exceptions.NoSuchObjectException

        # determine if the user can edit the application
        authService.can_edit_application(account, application)

        # attempt to lock the record (this may raise a Locked exception)
        alock = lock.lock(constants.LOCK_APPLICATION, application_id, account.id)

        # obtain the current journal, with associated lock
        current_journal = None
        cjlock = None
        if application.current_journal is not None:
            try:
                current_journal, cjlock = journalService.journal(application.current_journal, lock_journal=True, lock_account=account)
            except lock.Locked as e:
                # if the resource is locked, we have to back out
                if alock is not None: alock.delete()
                raise

        # obtain the related journal, with associated lock
        related_journal = None
        rjlock = None
        if application.related_journal is not None:
            try:
                related_journal, rjlock = journalService.journal(application.related_journal, lock_journal=True, lock_account=account)
            except lock.Locked as e:
                # if the resource is locked, we have to back out
                if alock is not None: alock.delete()
                if cjlock is not None: cjlock.delete()
                raise

        try:
            if current_journal is not None:
                current_journal.remove_current_application()
                saved = current_journal.save()
                if saved is None:
                    raise exceptions.SaveException("Unable to save journal record")

            if related_journal is not None:
                relation_record = related_journal.related_application_record(application_id)
                if relation_record is None:
                    relation_record = {}
                related_journal.add_related_application(application_id, relation_record.get("date_accepted"), "deleted")
                saved = related_journal.save()
                if saved is None:
                    raise exceptions.SaveException("Unable to save journal record")

            application.delete()

        finally:
            if alock is not None: alock.delete()
            if cjlock is not None: cjlock.delete()
            if rjlock is not None: rjlock.delete()

        return
Ejemplo n.º 18
0
    def update_request_for_journal(self, journal_id, account=None, lock_timeout=None):
        """
        Obtain an update request application object for the journal with the given journal_id

        An update request may either be loaded from the database, if it already exists, or created
        in-memory if it has not previously been created.

        If an account is provided, this will validate that the account holder is allowed to make
        the conversion from journal to application, if a conversion is required.

        When this request runs, the journal will be locked to the provided account if an account is
        given.  If the application is loaded from the database, this will also be locked for the account
        holder.

        :param journal_id:
        :param account:
        :return: a tuple of (Application Object, Journal Lock, Application Lock)
        """
        # first validate the incoming arguments to ensure that we've got the right thing
        argvalidate("update_request_for_journal", [
            {"arg": journal_id, "instance" : basestring, "allow_none" : False, "arg_name" : "journal_id"},
            {"arg" : account, "instance" : models.Account, "allow_none" : True, "arg_name" : "account"},
            {"arg" : lock_timeout, "instance" : int, "allow_none" : True, "arg_name" : "lock_timeout"}
        ], exceptions.ArgumentException)

        if app.logger.isEnabledFor("debug"): app.logger.debug("Entering update_request_for_journal")

        journalService = DOAJ.journalService()
        authService = DOAJ.authorisationService()

        # first retrieve the journal, and return empty if there isn't one.
        # We don't attempt to obtain a lock at this stage, as we want to check that the user is authorised first
        journal_lock = None
        journal, _ = journalService.journal(journal_id)
        if journal is None:
            app.logger.info("Request for journal {x} did not find anything in the database".format(x=journal_id))
            return None, None, None

        # if the journal is not in_doaj, we won't create an update request for it
        if not journal.is_in_doaj():
            app.logger.info("Request for journal {x} found it is not in_doaj; will not create update request".format(x=journal_id))
            return None, None, None

        # retrieve the latest application attached to this journal
        application_lock = None
        application = models.Suggestion.find_latest_by_current_journal(journal_id)

        # if no such application exists, create one in memory (this will check that the user is permitted to create one)
        # at the same time, create the lock for the journal.  This will throw an AuthorisedException or a Locked exception
        # (in that order of preference) if any problems arise.
        if application is None:
            app.logger.info("No existing update request for journal {x}; creating one".format(x=journal.id))
            application = journalService.journal_2_application(journal, account=account)
            lra_id = journal.latest_related_application_id()
            if lra_id is not None:
                lra, _ = self.application(lra_id)
                if lra is not None:
                    self.patch_application(application, lra)
            if account is not None:
                journal_lock = lock.lock("journal", journal_id, account.id)

        # otherwise check that the user (if given) has the rights to edit the application
        # then lock the application and journal to the account.
        # If a lock cannot be obtained, unlock the journal and application before we return
        elif account is not None:
            try:
                authService.can_edit_application(account, application)
                application_lock = lock.lock("suggestion", application.id, account.id)
                journal_lock = lock.lock("journal", journal_id, account.id)
            except lock.Locked as e:
                if application_lock is not None: application_lock.delete()
                if journal_lock is not None: journal_lock.delete()
                raise
            except exceptions.AuthoriseException as e:
                msg = "Account {x} is not permitted to edit the current update request on journal {y}".format(x=account.id, y=journal.id)
                app.logger.info(msg)
                e.message = msg
                raise

            app.logger.info("Using existing application {y} as update request for journal {x}".format(y=application.id, x=journal.id))

        if app.logger.isEnabledFor("debug"): app.logger.debug("Completed update_request_for_journal; return application object")

        return application, journal_lock, application_lock
Ejemplo n.º 19
0
    def journal_2_application(self, journal, account=None, keep_editors=False):
        """
        Function to convert a given journal into an application object.

        Provide the journal, and it will be converted
        in-memory to the application object (currently a Suggestion).  The new application
        WILL NOT be saved by this method.

        If an account is provided, this will validate that the account holder is
        allowed to make this conversion

        :param journal: a journal to convert
        :param account: an account doing the action - optional, if specified the application will only be created if the account is allowed to
        :return: Suggestion object
        """

        # first validate the incoming arguments to ensure that we've got the right thing
        argvalidate("journal_2_application", [{
            "arg": journal,
            "instance": models.Journal,
            "allow_none": False,
            "arg_name": "journal"
        }, {
            "arg": account,
            "instance": models.Account,
            "arg_name": "account"
        }], exceptions.ArgumentException)

        if app.logger.isEnabledFor("debug"):
            app.logger.debug("Entering journal_2_application")

        authService = DOAJ.authorisationService()

        # if an account is specified, check that it is allowed to perform this action
        if account is not None:
            try:
                authService.can_create_update_request(
                    account, journal)  # throws exception if not allowed
            except exceptions.AuthoriseException as e:
                msg = "Account {x} is not permitted to create an update request on journal {y}".format(
                    x=account.id, y=journal.id)
                app.logger.info(msg)
                e.message = msg
                raise

        # copy all the relevant information from the journal to the application
        bj = journal.bibjson()
        contacts = journal.contacts()
        notes = journal.notes
        first_contact = None

        application = models.Suggestion()
        application.set_application_status(
            constants.APPLICATION_STATUS_UPDATE_REQUEST)
        for c in contacts:
            application.add_contact(c.get("name"), c.get("email"))
            if first_contact is None:
                first_contact = c
        application.set_current_journal(journal.id)
        if keep_editors is True:
            if journal.editor is not None:
                application.set_editor(journal.editor)
            if journal.editor_group is not None:
                application.set_editor_group(journal.editor_group)
        for n in notes:
            application.add_note(n.get("note"), n.get("date"))
        application.set_owner(journal.owner)
        application.set_seal(journal.has_seal())
        application.set_bibjson(bj)
        if first_contact is not None:
            application.set_suggester(first_contact.get("name"),
                                      first_contact.get("email"))
        application.suggested_on = dates.now()

        if app.logger.isEnabledFor("debug"):
            app.logger.debug(
                "Completed journal_2_application; return application object")
        return application
Ejemplo n.º 20
0
Archivo: journal.py Proyecto: DOAJ/doaj
    def journal_2_application(self, journal, account=None, keep_editors=False):
        """
        Function to convert a given journal into an application object.

        Provide the journal, and it will be converted
        in-memory to the application object (currently a Suggestion).  The new application
        WILL NOT be saved by this method.

        If an account is provided, this will validate that the account holder is
        allowed to make this conversion

        :param journal: a journal to convert
        :param account: an account doing the action - optional, if specified the application will only be created if the account is allowed to
        :return: Suggestion object
        """

        # first validate the incoming arguments to ensure that we've got the right thing
        argvalidate("journal_2_application", [
            {"arg": journal, "instance" : models.Journal, "allow_none" : False, "arg_name" : "journal"},
            {"arg" : account, "instance" : models.Account, "arg_name" : "account"}
        ], exceptions.ArgumentException)

        if app.logger.isEnabledFor("debug"): app.logger.debug("Entering journal_2_application")

        authService = DOAJ.authorisationService()

        # if an account is specified, check that it is allowed to perform this action
        if account is not None:
            try:
                authService.can_create_update_request(account, journal)    # throws exception if not allowed
            except exceptions.AuthoriseException as e:
                msg = "Account {x} is not permitted to create an update request on journal {y}".format(x=account.id, y=journal.id)
                app.logger.info(msg)
                e.message = msg
                raise

        # copy all the relevant information from the journal to the application
        bj = journal.bibjson()
        contacts = journal.contacts()
        notes = journal.notes
        first_contact = None

        application = models.Suggestion()
        application.set_application_status(constants.APPLICATION_STATUS_UPDATE_REQUEST)
        for c in contacts:
            application.add_contact(c.get("name"), c.get("email"))
            if first_contact is None:
                first_contact = c
        application.set_current_journal(journal.id)
        if keep_editors is True:
            if journal.editor is not None:
                application.set_editor(journal.editor)
            if journal.editor_group is not None:
                application.set_editor_group(journal.editor_group)
        for n in notes:
            application.add_note(n.get("note"), n.get("date"))
        application.set_owner(journal.owner)
        application.set_seal(journal.has_seal())
        application.set_bibjson(bj)
        if first_contact is not None:
            application.set_suggester(first_contact.get("name"), first_contact.get("email"))
        application.suggested_on = dates.now()

        if app.logger.isEnabledFor("debug"): app.logger.debug("Completed journal_2_application; return application object")
        return application
Ejemplo n.º 21
0
    def reject_application(self,
                           application,
                           account,
                           provenance=True,
                           note=None,
                           manual_update=True):
        """
        Reject an application.  This will:
        * set the application status to "rejected" (if not already)
        * remove the current_journal field, and move it to related_journal (if needed)
        * remove the current_application field from the related journal (if needed)
        * save the application
        * write a provenance record for the rejection (if requested)

        :param application:
        :param account:
        :param provenance:
        :param manual_update:
        :return:
        """
        # first validate the incoming arguments to ensure that we've got the right thing
        argvalidate("reject_application", [{
            "arg": application,
            "instance": models.Suggestion,
            "allow_none": False,
            "arg_name": "application"
        }, {
            "arg": account,
            "instance": models.Account,
            "allow_none": False,
            "arg_name": "account"
        }, {
            "arg": provenance,
            "instance": bool,
            "allow_none": False,
            "arg_name": "provenance"
        }, {
            "arg": note,
            "instance": basestring,
            "allow_none": True,
            "arg_name": "note"
        }, {
            "arg": manual_update,
            "instance": bool,
            "allow_none": False,
            "arg_name": "manual_update"
        }], exceptions.ArgumentException)

        if app.logger.isEnabledFor("debug"):
            app.logger.debug("Entering reject_application")

        journalService = DOAJ.journalService()

        # check we're allowed to carry out this action
        if not account.has_role("reject_application"):
            raise exceptions.AuthoriseException(
                message="This user is not allowed to reject applications",
                reason=exceptions.AuthoriseException.WRONG_ROLE)

        # ensure the application status is "rejected"
        if application.application_status != constants.APPLICATION_STATUS_REJECTED:
            application.set_application_status(
                constants.APPLICATION_STATUS_REJECTED)

        # add the note to the application
        if note is not None:
            application.add_note(note)

        # retrieve the id of the current journal if there is one
        cj_id = application.current_journal
        cj = None

        # if there is a current_journal record, remove it, and record
        # it as a related journal.  This will let us come back later and know
        # which journal record this was intended as an update against if needed.
        if cj_id is not None:
            cj, _ = journalService.journal(cj_id)
            application.remove_current_journal()
            if cj is not None:
                application.set_related_journal(cj_id)
                cj.remove_current_application()

        # if there is a current journal, we will have modified it above, so save it
        if cj is not None:
            saved = cj.save()
            if saved is None:
                raise exceptions.SaveException(
                    "Save on current_journal in reject_application failed")

        # if we were asked to record this as a manual update, record that on the application
        if manual_update:
            application.set_last_manual_update()

        saved = application.save()
        if saved is None:
            raise exceptions.SaveException(
                "Save on application in reject_application failed")

        # record a provenance record that this action took place
        if provenance:
            models.Provenance.make(account,
                                   constants.PROVENANCE_STATUS_REJECTED,
                                   application)

        if app.logger.isEnabledFor("debug"):
            app.logger.debug("Completed reject_application")
Ejemplo n.º 22
0
def application_quick_reject(application_id):

    # extract the note information from the request
    canned_reason = request.values.get("reject_reason", "")
    additional_info = request.values.get("additional_reject_information", "")
    reasons = []
    if canned_reason != "":
        reasons.append(canned_reason)
    if additional_info != "":
        reasons.append(additional_info)
    if len(reasons) == 0:
        abort(400)
    reason = " - ".join(reasons)
    note = Messages.REJECT_NOTE_WRAPPER.format(editor=current_user.id,
                                               note=reason)

    applicationService = DOAJ.applicationService()

    # retrieve the application and an edit lock on that application
    application = None
    try:
        application, alock = applicationService.application(
            application_id,
            lock_application=True,
            lock_account=current_user._get_current_object())
    except lock.Locked as e:
        abort(409)

    # determine if this was a new application or an update request
    update_request = application.current_journal is not None
    if update_request:
        abort(400)

    # reject the application
    applicationService.reject_application(application,
                                          current_user._get_current_object(),
                                          note=note)

    # send the notification email to the user
    sent = False
    send_report = []
    try:
        send_report = emails.send_publisher_reject_email(
            application,
            note=reason,
            update_request=update_request,
            send_to_owner=True,
            send_to_suggester=True)
        sent = True
    except app_email.EmailException as e:
        pass

    # sort out some flash messages for the user
    flash(note, "success")

    for instructions in send_report:
        msg = ""
        flash_type = "success"
        if sent:
            if instructions["type"] == "owner":
                msg = Messages.SENT_REJECTED_APPLICATION_EMAIL_TO_OWNER.format(
                    user=application.owner,
                    email=instructions["email"],
                    name=instructions["name"])
            elif instructions["type"] == "suggester":
                msg = Messages.SENT_REJECTED_APPLICATION_EMAIL_TO_SUGGESTER.format(
                    email=instructions["email"], name=instructions["name"])
        else:
            msg = Messages.NOT_SENT_REJECTED_APPLICATION_EMAILS.format(
                user=application.owner)
            flash_type = "error"

        flash(msg, flash_type)

    # redirect the user back to the edit page
    return redirect(url_for('.suggestion_page', suggestion_id=application_id))
Ejemplo n.º 23
0
    def run(self):
        job = self.background_job
        params = job.params

        # Set up the files we need to run this task - a dir to place the report, and a place to write the article csv
        outdir = self.get_param(params, "outdir",
                                "article_duplicates_" + dates.today())
        job.add_audit_message("Saving reports to " + outdir)
        if not os.path.exists(outdir):
            os.makedirs(outdir)

        # Location for our interim CSV file of articles
        tmpdir = self.get_param(params, "tmpdir",
                                'tmp_article_duplicate_report')
        if not os.path.exists(tmpdir):
            os.makedirs(tmpdir)

        tmp_csvname = self.get_param(params, "article_csv", False)
        tmp_csvpath, total = self._make_csv_dump(tmpdir, tmp_csvname)

        # Initialise our reports
        global_reportfile = 'duplicate_articles_global_' + dates.today(
        ) + '.csv'
        global_reportpath = os.path.join(outdir, global_reportfile)
        f = open(global_reportpath, "w", encoding="utf-8")
        global_report = csv.writer(f)
        header = [
            "article_id", "article_created", "article_doi", "article_fulltext",
            "article_owner", "article_issns", "article_in_doaj", "n_matches",
            "match_type", "match_id", "match_created", "match_doi",
            "match_fulltext", "match_owner", "match_issns", "match_in_doaj",
            "owners_match", "titles_match", "article_title", "match_title"
        ]
        global_report.writerow(header)

        noids_reportfile = 'noids_' + dates.today() + '.csv'
        noids_reportpath = os.path.join(outdir, noids_reportfile)
        g = open(noids_reportpath, "w", encoding="utf-8")
        noids_report = csv.writer(g)
        header = [
            "article_id", "article_created", "article_owner", "article_issns",
            "article_in_doaj"
        ]
        noids_report.writerow(header)

        # Record the sets of duplicated articles
        global_matches = []

        a_count = 0

        articleService = DOAJ.articleService()

        # Read back in the article csv file we created earlier
        with open(tmp_csvpath, 'r', encoding='utf-8') as t:
            article_reader = csv.reader(t)

            start = datetime.now()
            estimated_finish = ""
            for a in article_reader:
                if a_count > 1 and a_count % 100 == 0:
                    n = datetime.now()
                    diff = (n - start).total_seconds()
                    expected_total = ((diff / a_count) * total)
                    estimated_finish = dates.format(
                        dates.after(start, expected_total))
                a_count += 1

                article = models.Article(
                    _source={
                        'id': a[0],
                        'created_date': a[1],
                        'bibjson': {
                            'identifier': json.loads(a[2]),
                            'link': json.loads(a[3]),
                            'title': a[4]
                        },
                        'admin': {
                            'in_doaj': json.loads(a[5])
                        }
                    })

                # Get the global duplicates
                try:
                    global_duplicates = articleService.discover_duplicates(
                        article,
                        results_per_match_type=10000,
                        include_article=False)
                except exceptions.DuplicateArticleException:
                    # this means the article did not have any ids that could be used for deduplication
                    owner = self._lookup_owner(article)
                    noids_report.writerow([
                        article.id, article.created_date, owner,
                        ','.join(article.bibjson().issns()),
                        article.is_in_doaj()
                    ])
                    continue

                dupcount = 0
                if global_duplicates:

                    # Look up an article's owner
                    owner = self._lookup_owner(article)

                    # Deduplicate the DOI and fulltext duplicate lists
                    s = set([article.id] + [
                        d.id for d in global_duplicates.get('doi', []) +
                        global_duplicates.get('fulltext', [])
                    ])
                    # remove article's own id from global_duplicates
                    dupcount = len(s) - 1
                    if s not in global_matches:
                        self._write_rows_from_duplicates(
                            article, owner, global_duplicates, global_report)
                        global_matches.append(s)

                app.logger.debug('{0}/{1} {2} {3} {4} {5}'.format(
                    a_count, total, article.id, dupcount, len(global_matches),
                    estimated_finish))

        job.add_audit_message(
            '{0} articles processed for duplicates. {1} global duplicate sets found.'
            .format(a_count, len(global_matches)))
        f.close()
        g.close()

        # Delete the transient temporary files.
        shutil.rmtree(tmpdir)

        # Email the reports if that parameter has been set.
        send_email = self.get_param(params, "email", False)
        if send_email:
            archive_name = "article_duplicates_" + dates.today()
            email_archive(outdir, archive_name)
            job.add_audit_message("email alert sent")
        else:
            job.add_audit_message("no email alert sent")