def test_01_same_fulltext(self): """Check duplication detection on articles with the same fulltext URL""" # A list of various URLs to check matching on ftus = [ "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf", "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf", "http://www.ujcem.med.sumdu.edu.ua/images/sampledata/2013/4/408_412_IV-020.pdf", "http://www.psychologie-aktuell.com/fileadmin/download/ptam/1-2014_20140324/01_Geiser.pdf" ] for ftu in ftus: # make ourselves an example article a = models.Article() b = a.bibjson() b.title = "Example article with a fulltext url" b.add_url(ftu, urltype="fulltext") a.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url(ftu, urltype="fulltext") # determine if there's a duplicate articleService = DOAJ.articleService() d = articleService.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example article with a fulltext url"
def query(path=None): """ Query endpoint for general queries via the web interface. Calls on the DOAJ.queryService for action :param path: :return: """ pathparts = request.path.strip('/').split('/') if len(pathparts) < 2: abort(400) domain = pathparts[0] index_type = pathparts[1] q = None # if this is a POST, read the contents out of the body if request.method == "POST": q = request.json # if there is a source param, load the json from it elif 'source' in request.values: q = json.loads(urllib2.unquote(request.values['source'])) try: account = None if current_user is not None and not current_user.is_anonymous: account = current_user._get_current_object() queryService = DOAJ.queryService() res = queryService.search(domain, index_type, q, account, request.values) except exceptions.AuthoriseException as e: abort(403) except exceptions.NoSuchObjectException as e: abort(404) resp = make_response(json.dumps(res)) resp.mimetype = "application/json" return resp
def test_07_both_duplication_criteria(self): """Check that an article is only reported once if it is duplicated by both DOI and fulltext URL""" # make ourselves an example article ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf" doi = "10.doi/123" a = models.Article() b = a.bibjson() b.title = "Example article with a fulltext url and a DOI" b.add_url(ftu, urltype="fulltext") b.add_identifier('doi', doi) a.save(blocking=True) # create another article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url and a DOI" y.add_url(ftu, urltype="fulltext") y.add_identifier('doi', doi) # determine if there's a duplicate articleService = DOAJ.articleService() d = articleService.get_duplicates(z) assert len(d) == 1 print len(d) assert d[0].bibjson().title == "Example article with a fulltext url and a DOI"
def retrieve(cls, id, account): # is the article id valid? ar = models.Article.pull(id) if ar is None: raise Api404Error() # at this point we're happy to return the article if it's # meant to be seen by the public if ar.is_in_doaj(): return OutgoingArticleDO.from_model(ar) # as long as authentication (in the layer above) has been successful, and the account exists, then # we are good to proceed if account is None: raise Api401Error() # Check we're allowed to retrieve this article articleService = DOAJ.articleService() if not articleService.is_legitimate_owner(ar, account.id): raise Api404Error() # not found for this account # Return the article oa = OutgoingArticleDO.from_model(ar) return oa
def search(cls, index_type, account, q, page, page_size, sort=None): if not index_type in ['article', 'journal', 'application']: raise DiscoveryException("There was an error executing your query for {0}. Unknown type.)".format(index_type)) if index_type == 'article': endpoint = 'search_articles' klass = models.Article elif index_type == 'journal': endpoint = 'search_journals' klass = models.Journal else: endpoint = 'search_applications' klass = models.Suggestion raw_query, page, page_size = cls._make_query(q, page, page_size, sort, index_type, False) # execute the query against the articles query_service = DOAJ.queryService() res = query_service.search('api_query', index_type, raw_query, account, None) # check to see if there was a search error if res.get("error") is not None: magic = uuid.uuid1() app.logger.error("Error executing discovery query search for {i}: {x} (ref: {y})".format(i=index_type, x=res.get("error"), y=magic)) raise DiscoveryException("There was an error executing your query (ref: {y})".format(y=magic)) obs = [klass(**raw) for raw in esprit.raw.unpack_json_result(res)] return cls._make_response(endpoint, res, q, page, page_size, sort, obs)
def test_05_full_doi(self): """ Test that we still detect duplicate DOIs when we have the full URI, not just the 10. """ # make ourselves a couple of example articles a = models.Article() b = a.bibjson() b.title = "Example A article with a DOI" b.add_identifier('doi', "https://doi.org/10.doi/123") a.save(blocking=True) # create an article which should not be caught by the duplicate detection not_duplicate = models.Article() not_duplicate_bibjson = not_duplicate.bibjson() not_duplicate_bibjson.title = "Example C article with a DOI" not_duplicate_bibjson.add_identifier('doi', "https://doi.org/10.doi/DIFFERENT") not_duplicate.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for DOI" y.add_identifier('doi', "http://doi.org/10.doi/123") # determine if there's a duplicate articleService = DOAJ.articleService() dups = articleService.get_duplicates(z) assert len(dups) == 1 # Check when we ask for one duplicate we get the most recent duplicate. d = articleService.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example A article with a DOI", d.bibjson().title
def test_04_with_doi_instead(self): """Detect a duplicate using the DOI field.""" # make ourselves a couple of example articles a = models.Article() b = a.bibjson() b.title = "Example A article with a DOI" b.add_identifier('doi', "10.doi/123") a.save(blocking=True) # create an article which should not be caught by the duplicate detection not_duplicate = models.Article() not_duplicate_bibjson = not_duplicate.bibjson() not_duplicate_bibjson.title = "Example C article with a DOI" not_duplicate_bibjson.add_identifier('doi', "10.doi/DIFFERENT") not_duplicate.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for DOI" y.add_identifier('doi', "10.doi/123") # determine if there's a duplicate articleService = DOAJ.articleService() dups = articleService.get_duplicates(z) assert len(dups) == 1 # Check when we ask for one duplicate we get the most recent duplicate. d = articleService.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example A article with a DOI", d.bibjson().title
def scroll(cls, index_type, account, q, page_size, sort=None, scan=False): if not index_type in ['article', 'journal', 'application']: raise DiscoveryException("There was an error executing your query for {0}. Unknown type.)".format(index_type)) page = 1 # Not used in scroll raw_query, page, page_size = cls._make_query(q, page, page_size, sort, index_type, True) # execute the query against the articles query_service = DOAJ.queryService() for result in query_service.scroll('api_query', index_type, raw_query, account, page_size, scan=scan): yield result
def run(self): """ Execute the task as specified by the background_job :return: """ job = self.background_job journalService = DOAJ.journalService() url, action_register = journalService.csv() for ar in action_register: job.add_audit_message(ar) job.add_audit_message(u"CSV generated; will be served from {y}".format(y=url))
def test_03_retrieve_multiple_conflict(self): ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf" # make ourselves a couple of example articles a = models.Article() b = a.bibjson() b.title = "Example A article with a fulltext url" b.add_url(ftu, urltype="fulltext") a.save(blocking=True) # Wait a second to ensure the timestamps are different time.sleep(1.01) a2 = models.Article() b2 = a2.bibjson() b2.title = "Example B article with a fulltext url" b2.add_url(ftu, urltype="fulltext") a2.save(blocking=True) # create an article which should not be caught by the duplicate detection not_duplicate = models.Article() not_duplicate_bibjson = not_duplicate.bibjson() not_duplicate_bibjson.title = "Example C article with a fulltext url" not_duplicate_bibjson.add_url("http://this.is/a/different/url", urltype="fulltext") not_duplicate.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url(ftu, urltype="fulltext") # determine that there are multiple duplicates articleService = DOAJ.articleService() with self.assertRaises(ArticleMergeConflict): d = articleService.get_duplicate(z) # get the xwalk to determine all duplicates # sort both results and expectations here to avoid false alarm # we don't care about the order of duplicates expected = sorted([a, a2]) # determine if there's a duplicate l = articleService.get_duplicates(z) assert isinstance(l, list), l assert l is not None l.sort() assert expected == l
def delete(cls, id, account, dry_run=False): # as long as authentication (in the layer above) has been successful, and the account exists, then # we are good to proceed if account is None: raise Api401Error() # now see if there's something for us to delete ar = models.Article.pull(id) if ar is None: raise Api404Error() # Check we're allowed to retrieve this article articleService = DOAJ.articleService() if not articleService.is_legitimate_owner(ar, account.id): raise Api404Error() # not found for this account # issue the delete (no record of the delete required) if not dry_run: ar.delete()
def create(cls, data, account): # as long as authentication (in the layer above) has been successful, and the account exists, then # we are good to proceed if account is None: raise Api401Error() # convert the data into a suitable article model am = cls.prep_article(data) articleService = DOAJ.articleService() try: result = articleService.create_article(am, account, add_journal_info=True) except ArticleMergeConflict as e: raise Api400Error(e.message) # Check we are allowed to create an article for this journal if result.get("fail", 0) == 1: raise Api403Error() return am
def test_02_different_fulltext(self): """Check that an article with different fulltext URLs is not considered a duplicate""" # make ourselves an example article a = models.Article() b = a.bibjson() b.title = "Example 2 article with a fulltext url" b.add_url("http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf", urltype="fulltext") a.save(blocking=True) # create another article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url("http://this.is/a/different/url", urltype="fulltext") # determine if there's a duplicate articleService = DOAJ.articleService() d = articleService.get_duplicate(z) assert d is None
def update(cls, id, data, account): # as long as authentication (in the layer above) has been successful, and the account exists, then # we are good to proceed if account is None: raise Api401Error() # now see if there's something for us to delete ar = models.Article.pull(id) if ar is None: raise Api404Error() # Check we're allowed to edit this article articleService = DOAJ.articleService() if not articleService.is_legitimate_owner(ar, account.id): raise Api404Error() # not found for this account # next thing to do is a structural validation of the replacement data, by instantiating the object try: ia = IncomingArticleDO(data) except dataobj.DataStructureException as e: raise Api400Error(e.message) # if that works, convert it to an Article object bringing over everything outside the # incoming article from the original article new_ar = ia.to_article_model(ar) # we need to ensure that any properties of the existing article that aren't allowed to change # are copied over new_ar.set_id(id) new_ar.set_created(ar.created_date) new_ar.bibjson().set_subjects(ar.bibjson().subjects()) new_ar = cls.__handle_journal_info(new_ar) # finally save the new article, and return to the caller new_ar.save() return new_ar
def run(self): job = self.background_job params = job.params # Set up the files we need to run this task - a dir to place the report, and a place to write the article csv outdir = self.get_param(params, "outdir", "article_duplicates_" + dates.today()) job.add_audit_message("Saving reports to " + outdir) if not os.path.exists(outdir): os.makedirs(outdir) # Location for our interim CSV file of articles tmpdir = self.get_param(params, "tmpdir", 'tmp_article_duplicate_report') if not os.path.exists(tmpdir): os.makedirs(tmpdir) tmp_csvname = self.get_param(params, "article_csv", False) tmp_csvpath, total = self._make_csv_dump(tmpdir, tmp_csvname) # Initialise our reports global_reportfile = 'duplicate_articles_global_' + dates.today() + '.csv' global_reportpath = os.path.join(outdir, global_reportfile) f = codecs.open(global_reportpath, "wb", "utf-8") global_report = UnicodeWriter(f) header = ["article_id", "article_created", "article_doi", "article_fulltext", "article_owner", "article_issns", "article_in_doaj", "n_matches", "match_type", "match_id", "match_created", "match_doi", "match_fulltext", "match_owner", "match_issns", "match_in_doaj", "owners_match", "titles_match", "article_title", "match_title"] global_report.writerow(header) noids_reportfile = 'noids_' + dates.today() + '.csv' noids_reportpath = os.path.join(outdir, noids_reportfile) g = codecs.open(noids_reportpath, "wb", "utf-8") noids_report = UnicodeWriter(g) header = ["article_id", "article_created", "article_owner", "article_issns", "article_in_doaj"] noids_report.writerow(header) # Record the sets of duplicated articles global_matches = [] a_count = 0 articleService = DOAJ.articleService() # Read back in the article csv file we created earlier with codecs.open(tmp_csvpath, 'rb', 'utf-8') as t: article_reader = UnicodeReader(t) start = datetime.now() estimated_finish = "" for a in article_reader: if a_count > 1 and a_count % 100 == 0: n = datetime.now() diff = (n - start).total_seconds() expected_total = ((diff / a_count) * total) estimated_finish = dates.format(dates.after(start, expected_total)) a_count += 1 article = models.Article(_source={'id': a[0], 'created_date': a[1], 'bibjson': {'identifier': json.loads(a[2]), 'link': json.loads(a[3]), 'title': a[4]}, 'admin': {'in_doaj': json.loads(a[5])}}) # Get the global duplicates try: global_duplicates = articleService.discover_duplicates(article, owner=None, results_per_match_type=10000) except exceptions.DuplicateArticleException: # this means the article did not have any ids that could be used for deduplication owner = self._lookup_owner(article) noids_report.writerow([article.id, article.created_date, owner, ','.join(article.bibjson().issns()), article.is_in_doaj()]) continue dupcount = 0 if global_duplicates: # Look up an article's owner owner = self._lookup_owner(article) # Deduplicate the DOI and fulltext duplicate lists s = set([article.id] + [d.id for d in global_duplicates.get('doi', []) + global_duplicates.get('fulltext', [])]) dupcount = len(s) - 1 if s not in global_matches: self._write_rows_from_duplicates(article, owner, global_duplicates, global_report) global_matches.append(s) app.logger.debug('{0}/{1} {2} {3} {4} {5}'.format(a_count, total, article.id, dupcount, len(global_matches), estimated_finish)) job.add_audit_message('{0} articles processed for duplicates. {1} global duplicate sets found.'.format(a_count, len(global_matches))) f.close() g.close() # Delete the transient temporary files. shutil.rmtree(tmpdir) # Email the reports if that parameter has been set. send_email = self.get_param(params, "email", False) if send_email: archive_name = "article_duplicates_" + dates.today() email_archive(outdir, archive_name) job.add_audit_message("email alert sent") else: job.add_audit_message("no email alert sent")
def reject_application(self, application, account, provenance=True, note=None, manual_update=True): """ Reject an application. This will: * set the application status to "rejected" (if not already) * remove the current_journal field, and move it to related_journal (if needed) * remove the current_application field from the related journal (if needed) * save the application * write a provenance record for the rejection (if requested) :param application: :param account: :param provenance: :param manual_update: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("reject_application", [ {"arg": application, "instance" : models.Suggestion, "allow_none" : False, "arg_name" : "application"}, {"arg" : account, "instance" : models.Account, "allow_none" : False, "arg_name" : "account"}, {"arg" : provenance, "instance" : bool, "allow_none" : False, "arg_name" : "provenance"}, {"arg" : note, "instance" : basestring, "allow_none" : True, "arg_name" : "note"}, {"arg" : manual_update, "instance" : bool, "allow_none" : False, "arg_name" : "manual_update"} ], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering reject_application") journalService = DOAJ.journalService() # check we're allowed to carry out this action if not account.has_role("reject_application"): raise exceptions.AuthoriseException(message="This user is not allowed to reject applications", reason=exceptions.AuthoriseException.WRONG_ROLE) # ensure the application status is "rejected" if application.application_status != constants.APPLICATION_STATUS_REJECTED: application.set_application_status(constants.APPLICATION_STATUS_REJECTED) # add the note to the application if note is not None: application.add_note(note) # retrieve the id of the current journal if there is one cj_id = application.current_journal cj = None # if there is a current_journal record, remove it, and record # it as a related journal. This will let us come back later and know # which journal record this was intended as an update against if needed. if cj_id is not None: cj, _ = journalService.journal(cj_id) application.remove_current_journal() if cj is not None: application.set_related_journal(cj_id) cj.remove_current_application() # if there is a current journal, we will have modified it above, so save it if cj is not None: saved = cj.save() if saved is None: raise exceptions.SaveException("Save on current_journal in reject_application failed") # if we were asked to record this as a manual update, record that on the application if manual_update: application.set_last_manual_update() saved = application.save() if saved is None: raise exceptions.SaveException("Save on application in reject_application failed") # record a provenance record that this action took place if provenance: models.Provenance.make(account, constants.PROVENANCE_STATUS_REJECTED, application) if app.logger.isEnabledFor("debug"): app.logger.debug("Completed reject_application")
def delete_application(self, application_id, account): """ Function to delete an application, and all references to it in other objects (current and related journals) The application and all related journals need to be locked before this process can proceed, so you may get a lock.Locked exception :param application_id: :param account: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("delete_application", [ {"arg": application_id, "instance" : unicode, "allow_none" : False, "arg_name" : "application_id"}, {"arg" : account, "instance" : models.Account, "allow_none" : False, "arg_name" : "account"} ], exceptions.ArgumentException) journalService = DOAJ.journalService() authService = DOAJ.authorisationService() # get hold of a copy of the application. If there isn't one, our work here is done # (note the application could be locked, in which case this will raise a lock.Locked exception) # get the application application, _ = self.application(application_id) if application is None: raise exceptions.NoSuchObjectException # determine if the user can edit the application authService.can_edit_application(account, application) # attempt to lock the record (this may raise a Locked exception) alock = lock.lock(constants.LOCK_APPLICATION, application_id, account.id) # obtain the current journal, with associated lock current_journal = None cjlock = None if application.current_journal is not None: try: current_journal, cjlock = journalService.journal(application.current_journal, lock_journal=True, lock_account=account) except lock.Locked as e: # if the resource is locked, we have to back out if alock is not None: alock.delete() raise # obtain the related journal, with associated lock related_journal = None rjlock = None if application.related_journal is not None: try: related_journal, rjlock = journalService.journal(application.related_journal, lock_journal=True, lock_account=account) except lock.Locked as e: # if the resource is locked, we have to back out if alock is not None: alock.delete() if cjlock is not None: cjlock.delete() raise try: if current_journal is not None: current_journal.remove_current_application() saved = current_journal.save() if saved is None: raise exceptions.SaveException("Unable to save journal record") if related_journal is not None: relation_record = related_journal.related_application_record(application_id) if relation_record is None: relation_record = {} related_journal.add_related_application(application_id, relation_record.get("date_accepted"), "deleted") saved = related_journal.save() if saved is None: raise exceptions.SaveException("Unable to save journal record") application.delete() finally: if alock is not None: alock.delete() if cjlock is not None: cjlock.delete() if rjlock is not None: rjlock.delete() return
def update_request_for_journal(self, journal_id, account=None, lock_timeout=None): """ Obtain an update request application object for the journal with the given journal_id An update request may either be loaded from the database, if it already exists, or created in-memory if it has not previously been created. If an account is provided, this will validate that the account holder is allowed to make the conversion from journal to application, if a conversion is required. When this request runs, the journal will be locked to the provided account if an account is given. If the application is loaded from the database, this will also be locked for the account holder. :param journal_id: :param account: :return: a tuple of (Application Object, Journal Lock, Application Lock) """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("update_request_for_journal", [ {"arg": journal_id, "instance" : basestring, "allow_none" : False, "arg_name" : "journal_id"}, {"arg" : account, "instance" : models.Account, "allow_none" : True, "arg_name" : "account"}, {"arg" : lock_timeout, "instance" : int, "allow_none" : True, "arg_name" : "lock_timeout"} ], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering update_request_for_journal") journalService = DOAJ.journalService() authService = DOAJ.authorisationService() # first retrieve the journal, and return empty if there isn't one. # We don't attempt to obtain a lock at this stage, as we want to check that the user is authorised first journal_lock = None journal, _ = journalService.journal(journal_id) if journal is None: app.logger.info("Request for journal {x} did not find anything in the database".format(x=journal_id)) return None, None, None # if the journal is not in_doaj, we won't create an update request for it if not journal.is_in_doaj(): app.logger.info("Request for journal {x} found it is not in_doaj; will not create update request".format(x=journal_id)) return None, None, None # retrieve the latest application attached to this journal application_lock = None application = models.Suggestion.find_latest_by_current_journal(journal_id) # if no such application exists, create one in memory (this will check that the user is permitted to create one) # at the same time, create the lock for the journal. This will throw an AuthorisedException or a Locked exception # (in that order of preference) if any problems arise. if application is None: app.logger.info("No existing update request for journal {x}; creating one".format(x=journal.id)) application = journalService.journal_2_application(journal, account=account) lra_id = journal.latest_related_application_id() if lra_id is not None: lra, _ = self.application(lra_id) if lra is not None: self.patch_application(application, lra) if account is not None: journal_lock = lock.lock("journal", journal_id, account.id) # otherwise check that the user (if given) has the rights to edit the application # then lock the application and journal to the account. # If a lock cannot be obtained, unlock the journal and application before we return elif account is not None: try: authService.can_edit_application(account, application) application_lock = lock.lock("suggestion", application.id, account.id) journal_lock = lock.lock("journal", journal_id, account.id) except lock.Locked as e: if application_lock is not None: application_lock.delete() if journal_lock is not None: journal_lock.delete() raise except exceptions.AuthoriseException as e: msg = "Account {x} is not permitted to edit the current update request on journal {y}".format(x=account.id, y=journal.id) app.logger.info(msg) e.message = msg raise app.logger.info("Using existing application {y} as update request for journal {x}".format(y=application.id, x=journal.id)) if app.logger.isEnabledFor("debug"): app.logger.debug("Completed update_request_for_journal; return application object") return application, journal_lock, application_lock
def journal_2_application(self, journal, account=None, keep_editors=False): """ Function to convert a given journal into an application object. Provide the journal, and it will be converted in-memory to the application object (currently a Suggestion). The new application WILL NOT be saved by this method. If an account is provided, this will validate that the account holder is allowed to make this conversion :param journal: a journal to convert :param account: an account doing the action - optional, if specified the application will only be created if the account is allowed to :return: Suggestion object """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("journal_2_application", [{ "arg": journal, "instance": models.Journal, "allow_none": False, "arg_name": "journal" }, { "arg": account, "instance": models.Account, "arg_name": "account" }], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering journal_2_application") authService = DOAJ.authorisationService() # if an account is specified, check that it is allowed to perform this action if account is not None: try: authService.can_create_update_request( account, journal) # throws exception if not allowed except exceptions.AuthoriseException as e: msg = "Account {x} is not permitted to create an update request on journal {y}".format( x=account.id, y=journal.id) app.logger.info(msg) e.message = msg raise # copy all the relevant information from the journal to the application bj = journal.bibjson() contacts = journal.contacts() notes = journal.notes first_contact = None application = models.Suggestion() application.set_application_status( constants.APPLICATION_STATUS_UPDATE_REQUEST) for c in contacts: application.add_contact(c.get("name"), c.get("email")) if first_contact is None: first_contact = c application.set_current_journal(journal.id) if keep_editors is True: if journal.editor is not None: application.set_editor(journal.editor) if journal.editor_group is not None: application.set_editor_group(journal.editor_group) for n in notes: application.add_note(n.get("note"), n.get("date")) application.set_owner(journal.owner) application.set_seal(journal.has_seal()) application.set_bibjson(bj) if first_contact is not None: application.set_suggester(first_contact.get("name"), first_contact.get("email")) application.suggested_on = dates.now() if app.logger.isEnabledFor("debug"): app.logger.debug( "Completed journal_2_application; return application object") return application
def journal_2_application(self, journal, account=None, keep_editors=False): """ Function to convert a given journal into an application object. Provide the journal, and it will be converted in-memory to the application object (currently a Suggestion). The new application WILL NOT be saved by this method. If an account is provided, this will validate that the account holder is allowed to make this conversion :param journal: a journal to convert :param account: an account doing the action - optional, if specified the application will only be created if the account is allowed to :return: Suggestion object """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("journal_2_application", [ {"arg": journal, "instance" : models.Journal, "allow_none" : False, "arg_name" : "journal"}, {"arg" : account, "instance" : models.Account, "arg_name" : "account"} ], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering journal_2_application") authService = DOAJ.authorisationService() # if an account is specified, check that it is allowed to perform this action if account is not None: try: authService.can_create_update_request(account, journal) # throws exception if not allowed except exceptions.AuthoriseException as e: msg = "Account {x} is not permitted to create an update request on journal {y}".format(x=account.id, y=journal.id) app.logger.info(msg) e.message = msg raise # copy all the relevant information from the journal to the application bj = journal.bibjson() contacts = journal.contacts() notes = journal.notes first_contact = None application = models.Suggestion() application.set_application_status(constants.APPLICATION_STATUS_UPDATE_REQUEST) for c in contacts: application.add_contact(c.get("name"), c.get("email")) if first_contact is None: first_contact = c application.set_current_journal(journal.id) if keep_editors is True: if journal.editor is not None: application.set_editor(journal.editor) if journal.editor_group is not None: application.set_editor_group(journal.editor_group) for n in notes: application.add_note(n.get("note"), n.get("date")) application.set_owner(journal.owner) application.set_seal(journal.has_seal()) application.set_bibjson(bj) if first_contact is not None: application.set_suggester(first_contact.get("name"), first_contact.get("email")) application.suggested_on = dates.now() if app.logger.isEnabledFor("debug"): app.logger.debug("Completed journal_2_application; return application object") return application
def reject_application(self, application, account, provenance=True, note=None, manual_update=True): """ Reject an application. This will: * set the application status to "rejected" (if not already) * remove the current_journal field, and move it to related_journal (if needed) * remove the current_application field from the related journal (if needed) * save the application * write a provenance record for the rejection (if requested) :param application: :param account: :param provenance: :param manual_update: :return: """ # first validate the incoming arguments to ensure that we've got the right thing argvalidate("reject_application", [{ "arg": application, "instance": models.Suggestion, "allow_none": False, "arg_name": "application" }, { "arg": account, "instance": models.Account, "allow_none": False, "arg_name": "account" }, { "arg": provenance, "instance": bool, "allow_none": False, "arg_name": "provenance" }, { "arg": note, "instance": basestring, "allow_none": True, "arg_name": "note" }, { "arg": manual_update, "instance": bool, "allow_none": False, "arg_name": "manual_update" }], exceptions.ArgumentException) if app.logger.isEnabledFor("debug"): app.logger.debug("Entering reject_application") journalService = DOAJ.journalService() # check we're allowed to carry out this action if not account.has_role("reject_application"): raise exceptions.AuthoriseException( message="This user is not allowed to reject applications", reason=exceptions.AuthoriseException.WRONG_ROLE) # ensure the application status is "rejected" if application.application_status != constants.APPLICATION_STATUS_REJECTED: application.set_application_status( constants.APPLICATION_STATUS_REJECTED) # add the note to the application if note is not None: application.add_note(note) # retrieve the id of the current journal if there is one cj_id = application.current_journal cj = None # if there is a current_journal record, remove it, and record # it as a related journal. This will let us come back later and know # which journal record this was intended as an update against if needed. if cj_id is not None: cj, _ = journalService.journal(cj_id) application.remove_current_journal() if cj is not None: application.set_related_journal(cj_id) cj.remove_current_application() # if there is a current journal, we will have modified it above, so save it if cj is not None: saved = cj.save() if saved is None: raise exceptions.SaveException( "Save on current_journal in reject_application failed") # if we were asked to record this as a manual update, record that on the application if manual_update: application.set_last_manual_update() saved = application.save() if saved is None: raise exceptions.SaveException( "Save on application in reject_application failed") # record a provenance record that this action took place if provenance: models.Provenance.make(account, constants.PROVENANCE_STATUS_REJECTED, application) if app.logger.isEnabledFor("debug"): app.logger.debug("Completed reject_application")
def application_quick_reject(application_id): # extract the note information from the request canned_reason = request.values.get("reject_reason", "") additional_info = request.values.get("additional_reject_information", "") reasons = [] if canned_reason != "": reasons.append(canned_reason) if additional_info != "": reasons.append(additional_info) if len(reasons) == 0: abort(400) reason = " - ".join(reasons) note = Messages.REJECT_NOTE_WRAPPER.format(editor=current_user.id, note=reason) applicationService = DOAJ.applicationService() # retrieve the application and an edit lock on that application application = None try: application, alock = applicationService.application( application_id, lock_application=True, lock_account=current_user._get_current_object()) except lock.Locked as e: abort(409) # determine if this was a new application or an update request update_request = application.current_journal is not None if update_request: abort(400) # reject the application applicationService.reject_application(application, current_user._get_current_object(), note=note) # send the notification email to the user sent = False send_report = [] try: send_report = emails.send_publisher_reject_email( application, note=reason, update_request=update_request, send_to_owner=True, send_to_suggester=True) sent = True except app_email.EmailException as e: pass # sort out some flash messages for the user flash(note, "success") for instructions in send_report: msg = "" flash_type = "success" if sent: if instructions["type"] == "owner": msg = Messages.SENT_REJECTED_APPLICATION_EMAIL_TO_OWNER.format( user=application.owner, email=instructions["email"], name=instructions["name"]) elif instructions["type"] == "suggester": msg = Messages.SENT_REJECTED_APPLICATION_EMAIL_TO_SUGGESTER.format( email=instructions["email"], name=instructions["name"]) else: msg = Messages.NOT_SENT_REJECTED_APPLICATION_EMAILS.format( user=application.owner) flash_type = "error" flash(msg, flash_type) # redirect the user back to the edit page return redirect(url_for('.suggestion_page', suggestion_id=application_id))
def run(self): job = self.background_job params = job.params # Set up the files we need to run this task - a dir to place the report, and a place to write the article csv outdir = self.get_param(params, "outdir", "article_duplicates_" + dates.today()) job.add_audit_message("Saving reports to " + outdir) if not os.path.exists(outdir): os.makedirs(outdir) # Location for our interim CSV file of articles tmpdir = self.get_param(params, "tmpdir", 'tmp_article_duplicate_report') if not os.path.exists(tmpdir): os.makedirs(tmpdir) tmp_csvname = self.get_param(params, "article_csv", False) tmp_csvpath, total = self._make_csv_dump(tmpdir, tmp_csvname) # Initialise our reports global_reportfile = 'duplicate_articles_global_' + dates.today( ) + '.csv' global_reportpath = os.path.join(outdir, global_reportfile) f = open(global_reportpath, "w", encoding="utf-8") global_report = csv.writer(f) header = [ "article_id", "article_created", "article_doi", "article_fulltext", "article_owner", "article_issns", "article_in_doaj", "n_matches", "match_type", "match_id", "match_created", "match_doi", "match_fulltext", "match_owner", "match_issns", "match_in_doaj", "owners_match", "titles_match", "article_title", "match_title" ] global_report.writerow(header) noids_reportfile = 'noids_' + dates.today() + '.csv' noids_reportpath = os.path.join(outdir, noids_reportfile) g = open(noids_reportpath, "w", encoding="utf-8") noids_report = csv.writer(g) header = [ "article_id", "article_created", "article_owner", "article_issns", "article_in_doaj" ] noids_report.writerow(header) # Record the sets of duplicated articles global_matches = [] a_count = 0 articleService = DOAJ.articleService() # Read back in the article csv file we created earlier with open(tmp_csvpath, 'r', encoding='utf-8') as t: article_reader = csv.reader(t) start = datetime.now() estimated_finish = "" for a in article_reader: if a_count > 1 and a_count % 100 == 0: n = datetime.now() diff = (n - start).total_seconds() expected_total = ((diff / a_count) * total) estimated_finish = dates.format( dates.after(start, expected_total)) a_count += 1 article = models.Article( _source={ 'id': a[0], 'created_date': a[1], 'bibjson': { 'identifier': json.loads(a[2]), 'link': json.loads(a[3]), 'title': a[4] }, 'admin': { 'in_doaj': json.loads(a[5]) } }) # Get the global duplicates try: global_duplicates = articleService.discover_duplicates( article, results_per_match_type=10000, include_article=False) except exceptions.DuplicateArticleException: # this means the article did not have any ids that could be used for deduplication owner = self._lookup_owner(article) noids_report.writerow([ article.id, article.created_date, owner, ','.join(article.bibjson().issns()), article.is_in_doaj() ]) continue dupcount = 0 if global_duplicates: # Look up an article's owner owner = self._lookup_owner(article) # Deduplicate the DOI and fulltext duplicate lists s = set([article.id] + [ d.id for d in global_duplicates.get('doi', []) + global_duplicates.get('fulltext', []) ]) # remove article's own id from global_duplicates dupcount = len(s) - 1 if s not in global_matches: self._write_rows_from_duplicates( article, owner, global_duplicates, global_report) global_matches.append(s) app.logger.debug('{0}/{1} {2} {3} {4} {5}'.format( a_count, total, article.id, dupcount, len(global_matches), estimated_finish)) job.add_audit_message( '{0} articles processed for duplicates. {1} global duplicate sets found.' .format(a_count, len(global_matches))) f.close() g.close() # Delete the transient temporary files. shutil.rmtree(tmpdir) # Email the reports if that parameter has been set. send_email = self.get_param(params, "email", False) if send_email: archive_name = "article_duplicates_" + dates.today() email_archive(outdir, archive_name) job.add_audit_message("email alert sent") else: job.add_audit_message("no email alert sent")