def get_journal(cls, specs): journals = [] for spec in specs: source = JournalFixtureFactory.make_journal_source(in_doaj=True) j = Journal(**source) bj = j.bibjson() bj.title = spec.get("title", "Journal Title") bj.remove_identifiers() if "pissn" in spec: bj.add_identifier(bj.P_ISSN, spec.get("pissn")) if "eissn" in spec: bj.add_identifier(bj.E_ISSN, spec.get("eissn")) spec["instance"] = j journals.append(spec) def mock(self): bibjson = self.bibjson() # first, get the ISSNs associated with the record pissns = bibjson.get_identifiers(bibjson.P_ISSN) eissns = bibjson.get_identifiers(bibjson.E_ISSN) for j in journals: if j["pissn"] in pissns and j["eissn"] in eissns: return j["instance"] return mock
def migrate_journal(data): if "bibjson" not in data: return Journal(**data) ap = data.get("bibjson").get("archiving_policy") if ap is None: return Journal(**data) data["bibjson"]["archiving_policy"] = _reformat_data(ap) return Journal(**data)
def applications_inconsistencies(outfile_later, outfile_missing, conn): with codecs.open(outfile_later, "wb", "utf-8") as f, codecs.open(outfile_missing, "wb", "utf-8") as g: out_later = csv.writer(f) out_later.writerow(["Application ID", "Application Last Updated", "Latest Provenance Recorded", "Difference"]) out_missing = UnicodeWriter(g) out_missing.writerow(["Application ID", "Application Last Manual Update", "Latest Provenance Record", "ISSNs", "Title"]) counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"): counter += 1 application = Suggestion(**result) print counter, application.id # Part 1 - later provenance records exist latest_prov = Provenance.get_latest_by_resource_id(application.id) if latest_prov is not None: lustamp = adjust_timestamp(application.last_updated_timestamp, APP_TIMEZONE_CUTOFF) created = latest_prov.created_date pstamp = latest_prov.created_timestamp td = pstamp - lustamp diff = td.total_seconds() if diff > THRESHOLD: out_later.writerow([application.id, application.last_updated, created, diff]) # Part 2 - missing journals if application.application_status == constants.APPLICATION_STATUS_ACCEPTED: missing = False # find the matching journals by issn or by title matching_journals = Journal.find_by_issn(application.bibjson().issns()) if len(matching_journals) == 0: # Have another go, find by title matching_journals = Journal.find_by_title(application.bibjson().title) # if there are no matching journals, it is missing. if len(matching_journals) == 0: missing = True else: # if there are matching journals, find out if any of them are in the doaj. If none, then journal is still missing those_in_doaj = len([j for j in matching_journals if j.is_in_doaj()]) if those_in_doaj == 0: missing = True # if the journal is missing, record it if missing: created = "" if latest_prov is not None: created = latest_prov.created_date out_missing.writerow([application.id, application.last_manual_update, created, " ".join(application.bibjson().issns()), application.bibjson().title]) print "processed", counter, "suggestions"
def setUp(self): super(TestCreateOrUpdateArticle, self).setUp() self.publisher = Account() self.publisher.add_role("publisher") self.publisher.save(blocking=True) self.admin = Account() self.admin.add_role("admin") self.admin.save(blocking=True) sources = JournalFixtureFactory.make_many_journal_sources(2, True) self.journal1 = Journal(**sources[0]) self.journal1.set_owner(self.publisher.id) jbib1 = self.journal1.bibjson() jbib1.add_identifier(jbib1.P_ISSN, "1111-1111") jbib1.add_identifier(jbib1.E_ISSN, "2222-2222") self.journal1.save(blocking=True) self.publisher.add_journal(self.journal1) self.journal2 = Journal(**sources[1]) jbib2 = self.journal2.bibjson() jbib2.add_identifier(jbib2.P_ISSN, "1234-5678") jbib2.add_identifier(jbib2.E_ISSN, "9876-5432") self.journal2.save(blocking=True) self.article10 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-10", fulltext="https://www.article10.com")) self.article10.set_id("articleid10") self.article10.save(blocking=True) self.article11 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-11", fulltext="https://www.article11.com")) self.article11.set_id("articleid11") self.article11.save(blocking=True) self.article2 = Article(**ArticleFixtureFactory.make_article_source( pissn="1234-5678", eissn="9876-5432", doi="10.0000/article-2", fulltext="https://www.article2.com")) self.article2.set_id("articleid2") self.article2.save(blocking=True)
def get_owner(self): b = self.bibjson() article_issns = b.get_identifiers(b.P_ISSN) article_issns += b.get_identifiers(b.E_ISSN) owners = [] seen_journal_issns = {} for issn in article_issns: journals = Journal.find_by_issn(issn) if journals is not None and len(journals) > 0: for j in journals: owners.append(j.owner) if j.owner not in seen_journal_issns: seen_journal_issns[j.owner] = [] seen_journal_issns[j.owner] += j.bibjson().issns() # deduplicate the list of owners owners = list(set(owners)) # no owner means we can't confirm if len(owners) == 0: raise NoValidOwnerException # multiple owners means ownership of this article is confused if len(owners) > 1: return NoValidOwnerException return owners[0]
def get_publisher(acc): q = { "query": { "bool": { "must": [{ "term": { "admin.owner.exact": acc.id } }, { "term": { "admin.in_doaj": True } }] } }, "size": 0, "facets": { "publishers": { "terms": { "field": "bibjson.publisher.exact", "size": 1000 } } } } es = Journal.query(q=q) pubs = [ term.get("term") for term in es.get("facets", {}).get( "publishers", {}).get("terms", []) ] if len(pubs) == 0: return None return ", ".join(pubs)
def get_publisher(acc): q = { "query" : { "bool" : { "must" : [ {"term" : {"admin.owner.exact" : acc.id}}, {"term" : {"admin.in_doaj" : True}} ] } }, "size" : 0, "facets" : { "publishers" : { "terms" : { "field" : "bibjson.publisher.exact", "size" : 1000 } } } } es = Journal.query(q=q) pubs = [term.get("term") for term in es.get("facets", {}).get("publishers", {}).get("terms", [])] if len(pubs) == 0: return None return ", ".join(pubs)
def _sync_owner_to_journal(self): if self.current_journal is None: return from portality.models import Journal cj = Journal.pull(self.current_journal) if cj is not None and cj.owner != self.owner: cj.set_owner(self.owner) cj.save(sync_owner=False)
def traverse_journals(): """ Check dataset lookups in all journals in DOAJ """ j_report = {'country': [], 'currency': [], 'language': []} for j in Journal.all_in_doaj(): check_invalid_datasets(j, j_report) return j_report
def test_has_permissions(self): journal_source = JournalFixtureFactory.make_journal_source() journal1 = Journal(**journal_source) publisher_owner_src = AccountFixtureFactory.make_publisher_source() publisher_owner = Account(**publisher_owner_src) publisher_stranged_src = AccountFixtureFactory.make_publisher_source() publisher_stranged = Account(**publisher_stranged_src) admin_src = AccountFixtureFactory.make_managing_editor_source() admin = Account(**admin_src) journal1.set_owner(publisher_owner) journal1.save(blocking=True) eissn = journal1.bibjson().get_one_identifier("eissn") pissn = journal1.bibjson().get_one_identifier("pissn") art_source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn) article = Article(**art_source) assert self.svc.has_permissions(publisher_stranged, article, False) assert self.svc.has_permissions(publisher_owner, article, True) assert self.svc.has_permissions(admin, article, True) failed_result = self.svc.has_permissions(publisher_stranged, article, True) assert failed_result == {'success': 0, 'fail': 1, 'update': 0, 'new': 0, 'shared': [], 'unowned': [pissn, eissn], 'unmatched': []}, "received: {}".format(failed_result)
def load_journal_cases(): account = Account(**AccountFixtureFactory.make_publisher_source()) account.set_id(account.makeid()) journal = Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_id(journal.makeid()) wrong_id = uuid.uuid4() return [ param("j_id_acc_lock", journal, journal.id, account, True, raises=lock.Locked), param("j_id_acc_nolock", journal, journal.id, account, False), param("j_id_noacc_nolock", journal, journal.id, None, False), param("j_noid_noacc_nolock", journal, None, None, False, raises=exceptions.ArgumentException), param("j_wid_noacc_nolock", journal, wrong_id, None, False), param("noj_id_noacc_nolock", None, journal.id, None, False), param("noj_noid_noacc_nolock", None, None, None, False, raises=exceptions.ArgumentException) ]
def _get_journal_id_from_issn(issn): issn = _normalise_issn(issn) journals = Journal.find_by_issn(issn) if len(journals) > 1: print "WARN: issn", issn, "maps to multiple journals:", ", ".join([j.id for j in journals]) if len(journals) == 0: print "WARN: issn", issn, "does not map to any journals" if len(journals) > 0: return journals[0].id
def get_result_url(self): """ Get the URL for this OpenURLRequest's referent. :return: The url as a string, or None if not found. """ try: results = self.query_es() except ValueError: return None if results is None: return None if results.get('hits', {}).get('total', 0) == 0: # No results found for query, retry results = self.fallthrough_retry() if results is None or results.get('hits', {}).get('total', 0) == 0: # This time we've definitely failed return None if results.get('hits', {}).get('hits', [{}])[0].get('_type') == 'journal': # construct a journal object around the result journal = Journal(**results['hits']['hits'][0]) # the continuation is a first-class journal object, so if we have a journal we have the right continuation # (assuming that the user gave us specific enough information ident = journal.id # If there request has a volume parameter, query for presence of an article with that volume if self.volume: vol_iss_results = self.query_for_vol(journal) if vol_iss_results == None: # we were asked for a vol/issue, but weren't given the correct information to get it. return None elif vol_iss_results['hits']['total'] > 0: # construct the toc url using the ident, plus volume and issue jtoc_url = url_for("doaj.toc", identifier=ident, volume=self.volume, issue=self.issue) else: # If no results, the DOAJ does not contain the vol/issue being searched. (Show openurl 404) jtoc_url = None else: # if no volume parameter, construct the toc url using the ident only jtoc_url = url_for("doaj.toc", identifier=ident) return jtoc_url elif results.get('hits', {}).get('hits', [{}])[0].get('_type') == 'article': return url_for("doaj.article_page", identifier=results['hits']['hits'][0]['_id'])
def publishers_with_journals(): """ Get accounts for all publishers with journals in the DOAJ """ for acc in esprit.tasks.scroll(conn, 'account', q=publisher_query): account = Account(**acc) journal_ids = account.journal if journal_ids is not None: for j in journal_ids: journal = Journal.pull(j) if journal is not None and journal.is_in_doaj(): yield account break
def suggestion2journal(suggestion): journal_data = deepcopy(suggestion.data) del journal_data['suggestion'] del journal_data['index'] del journal_data['admin']['application_status'] del journal_data['id'] del journal_data['created_date'] del journal_data['last_updated'] journal_data['bibjson']['active'] = True new_j = Journal(**journal_data) return new_j
def make_journal(self): # first make a raw copy of the content into a journal journal_data = deepcopy(self.data) if "suggestion" in journal_data: del journal_data['suggestion'] if "index" in journal_data: del journal_data['index'] if "admin" in journal_data and "application_status" in journal_data["admin"]: del journal_data['admin']['application_status'] if "id" in journal_data: del journal_data['id'] if "created_date" in journal_data: del journal_data['created_date'] if "last_updated" in journal_data: del journal_data['last_updated'] if "bibjson" not in journal_data: journal_data["bibjson"] = {} journal_data['bibjson']['active'] = True new_j = Journal(**journal_data) # now deal with the fact that this could be a replacement of an existing journal if self.current_journal is not None: cj = Journal.pull(self.current_journal) # carry the id and the created date new_j.set_id(self.current_journal) new_j.set_created(cj.created_date) # set a reapplication date new_j.set_last_reapplication() # carry any continuations hist = cj.get_history_raw() if hist is not None and len(hist) > 0: new_j.set_history(cj.get_history_raw()) # remove the reference to the current_journal del new_j.data["admin"]["current_journal"] return new_j
def delete_selected(cls, query=None, owner=None, snapshot=True): if owner is not None: from portality.models import Journal issns = Journal.issns_by_owner(owner) q = ArticleQuery(issns=issns) query = q.query() if snapshot: articles = cls.iterate(query, page_size=1000) for article in articles: article.snapshot() return cls.delete_by_query(query)
def list_records(self, from_date=None, until_date=None, oai_set=None, list_size=None, start_after=None): total, results = super(OAIPMHJournal, self).list_records(from_date=from_date, until_date=until_date, oai_set=oai_set, list_size=list_size, start_after=start_after) return total, [Journal(**r) for r in results]
def setUp(self): super(TestAdminEditMetadata, self).setUp() admin_account = Account.make_account(username="******", name="Admin", email="*****@*****.**", roles=["admin"]) admin_account.set_password('password123') admin_account.save() publisher_account = Account.make_account(username="******", name="Publisher", email="*****@*****.**", roles=["publisher"]) publisher_account.set_password('password456') publisher_account.save(blocking=True) self.j = Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) self.j.save(blocking=True) self.a = Article(**ArticleFixtureFactory.make_article_source( in_doaj=True)) self.a.save(blocking=True)
def find_by_issn(cls, issns, owners): journals = [] seen_issns = [] for owner in owners: for eissn, pissn in issns: if eissn not in seen_issns and eissn is not None: seen_issns.append(eissn) if pissn not in seen_issns and pissn is not None: seen_issns.append(pissn) source = JournalFixtureFactory.make_journal_source( in_doaj=True) journal = Journal(**source) journal.set_owner(owner) journal.bibjson().remove_identifiers("eissn") journal.bibjson().remove_identifiers("pissn") if eissn is not None: journal.bibjson().add_identifier("eissn", eissn) if pissn is not None: journal.bibjson().add_identifier("pissn", pissn) journals.append(journal) @classmethod def mock(cls, issns, in_doaj=None, max=10): if not isinstance(issns, list): issns = [issns] for issn in issns: if issn in seen_issns: return journals return [] return mock
def load_j2a_cases(): journal = Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) account_source = AccountFixtureFactory.make_publisher_source() owner_account = Account(**deepcopy(account_source)) owner_account.set_id(journal.owner) non_owner_publisher = Account(**deepcopy(account_source)) non_publisher = Account(**deepcopy(account_source)) non_publisher.remove_role("publisher") admin = Account(**deepcopy(account_source)) admin.add_role("admin") return [ param("no_journal_no_account", None, None, raises=exceptions.ArgumentException), param("no_journal_with_account", None, owner_account, raises=exceptions.ArgumentException), param("journal_no_account", journal, None, comparator=application_matches), param("journal_matching_account", journal, owner_account, comparator=application_matches), param("journal_unmatched_account", journal, non_owner_publisher, raises=exceptions.AuthoriseException), param("journal_non_publisher_account", journal, non_publisher, raises=exceptions.AuthoriseException), param("journal_admin_account", journal, admin, comparator=application_matches) ]
def query_es(self): """ Query Elasticsearch for a set of matches for this request. :return: The results of a query through the dao, a JSON object. """ # Copy to the template, which will be populated with terms populated_query = deepcopy(TERMS_SEARCH) # Get all of the attributes with values set. set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)] # If we don't have a genre, guess journal FIXME: is it correct to assume journal? if not self.genre: self.genre = SUPPORTED_GENRES[ 0] # TODO: we may want to handle 404 instead # Set i to use either our mapping for journals or articles i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower()) # Add the attributes to the query for (k, v) in set_attributes: es_term = OPENURL_TO_ES[k][i] if es_term is None: continue else: term = {"term": {es_term: v}} populated_query["query"]["bool"]["must"].append(term) # avoid doing an empty query if len(populated_query["query"]["bool"]["must"]) == 0: app.logger.debug("No valid search terms in OpenURL object") return None # Return the results of the query if i == 0: app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query)) return Journal.query(q=populated_query) elif i == 1: app.logger.debug("OpenURL query to article: " + json.dumps(populated_query)) return Article.query(q=populated_query)
def lookup_subject_categories(issns): """ By ISSN, get the subject classification of a journal """ subjects_column = [] for i in issns: il = [s.strip() for s in i.split(',')] j = Journal.find_by_issn(il, in_doaj=False) if len(j) == 0: subjects_column.append('Error: not found') elif len(j) == 1: subj = j[0].bibjson().subjects() subjects_column.append(', '.join( [f"{s['scheme']}:{s['code']} - {s['term']}" for s in subj])) else: subjects_column.append( 'Error: multiple records found for that ISSN') return subjects_column
def load_issn_journal_map(): # we need to go over every journal in the index for j in Journal.iterall(): obj = {} jbib = j.bibjson() # store only the data we want for the article migration obj["subjects"] = jbib.subjects() obj["title"] = jbib.title obj["license"] = jbib.get_license() obj["language"] = jbib.language obj["country"] = jbib.country obj["in_doaj"] = j.is_in_doaj() # get the issns that map to this journal (or any previous version of it) issns = j.data.get("index", {}).get("issn", []) # register pointers to the object for each issn for issn in issns: issnmap[issn] = obj
def get_journal(self): """ Get this article's associated journal :return: A Journal, or None if this is an orphan article """ bibjson = self.bibjson() # first, get the ISSNs associated with the record pissns = bibjson.get_identifiers(bibjson.P_ISSN) eissns = bibjson.get_identifiers(bibjson.E_ISSN) allissns = list(set(pissns + eissns)) # find a matching journal record from the index journal = None for issn in allissns: journals = Journal.find_by_issn(issn) if len(journals) > 0: # there should only ever be one, so take the first one journal = journals[0] break return journal
def query_es(self): """ Query Elasticsearch for a set of matches for this request. :return: The results of a query through the dao, a JSON object. """ # Copy to the template, which will be populated with terms populated_query = deepcopy(TERMS_SEARCH) # Get all of the attributes with values set. set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)] # If we don't have a genre, guess journal FIXME: is it correct to assume journal? if not self.genre: self.genre = SUPPORTED_GENRES[0] # TODO: we may want to handle 404 instead # Set i to use either our mapping for journals or articles i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower()) # Add the attributes to the query for (k, v) in set_attributes: es_term = OPENURL_TO_ES[k][i] if es_term is None: continue else: term = {"term": {es_term: v}} populated_query["query"]["bool"]["must"].append(term) # avoid doing an empty query if len(populated_query["query"]["bool"]["must"]) == 0: app.logger.debug("No valid search terms in OpenURL object") return None # Return the results of the query if i == 0: app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query)) return Journal.query(q=populated_query) elif i == 1: app.logger.debug("OpenURL query to article: " + json.dumps(populated_query)) return Article.query(q=populated_query)
def get_associated_journals(self): # find all matching journal record from the index allissns = self.bibjson().issns() return Journal.find_by_issn(allissns)
def form2obj(form, existing_journal): journal = Journal() bibjson = journal.bibjson() # The if statements that wrap practically every field are there due to this # form being used to edit old journals which don't necessarily have most of # this info. # It also allows admins to delete the contents of any field if they wish, # by ticking the "Allow incomplete form" checkbox and deleting the contents # of that field. The if condition(s) will then *not* add the relevant field to the # new journal object being constructed. # add_url in the journal model has a safeguard against empty URL-s. if form.title.data: bibjson.title = form.title.data bibjson.add_url(form.url.data, urltype='homepage') if form.alternative_title.data: bibjson.alternative_title = form.alternative_title.data if form.pissn.data: bibjson.add_identifier(bibjson.P_ISSN, form.pissn.data) if form.eissn.data: bibjson.add_identifier(bibjson.E_ISSN, form.eissn.data) if form.publisher.data: bibjson.publisher = form.publisher.data if form.society_institution.data: bibjson.institution = form.society_institution.data if form.platform.data: bibjson.provider = form.platform.data if form.contact_name.data or form.contact_email.data: journal.add_contact(form.contact_name.data, form.contact_email.data) if form.country.data: bibjson.country = form.country.data if forms.interpret_special(form.processing_charges.data): bibjson.set_apc(form.processing_charges_currency.data, form.processing_charges_amount.data) if forms.interpret_special(form.submission_charges.data): bibjson.set_submission_charges(form.submission_charges_currency.data, form.submission_charges_amount.data) if forms.interpret_special(form.waiver_policy.data): bibjson.add_url(form.waiver_policy_url.data, 'waiver_policy') # checkboxes if forms.interpret_special(form.digital_archiving_policy.data) or form.digital_archiving_policy_url.data: archiving_policies = forms.interpret_special(form.digital_archiving_policy.data) archiving_policies = forms.interpret_other(archiving_policies, form.digital_archiving_policy_other.data, store_other_label=True) archiving_policies = forms.interpret_other(archiving_policies, form.digital_archiving_policy_library.data, forms.digital_archiving_policy_specific_library_value, store_other_label=True) bibjson.set_archiving_policy(archiving_policies, form.digital_archiving_policy_url.data) if form.crawl_permission.data and form.crawl_permission.data != 'None': bibjson.allows_fulltext_indexing = forms.interpret_special(form.crawl_permission.data) # just binary # checkboxes article_ids = forms.interpret_special(form.article_identifiers.data) article_ids = forms.interpret_other(article_ids, form.article_identifiers_other.data) if article_ids: bibjson.persistent_identifier_scheme = article_ids if (form.download_statistics.data and form.download_statistics.data != 'None') or form.download_statistics_url.data: bibjson.set_article_statistics(form.download_statistics_url.data, forms.interpret_special(form.download_statistics.data)) if form.first_fulltext_oa_year.data: bibjson.set_oa_start(year=form.first_fulltext_oa_year.data) # checkboxes fulltext_format = forms.interpret_other(form.fulltext_format.data, form.fulltext_format_other.data) if fulltext_format: bibjson.format = fulltext_format if form.keywords.data: bibjson.set_keywords(form.keywords.data) # tag list field if form.languages.data: bibjson.set_language(form.languages.data) # select multiple field - gives a list back bibjson.add_url(form.editorial_board_url.data, urltype='editorial_board') if form.review_process.data or form.review_process_url.data: bibjson.set_editorial_review(form.review_process.data, form.review_process_url.data) bibjson.add_url(form.aims_scope_url.data, urltype='aims_scope') bibjson.add_url(form.instructions_authors_url.data, urltype='author_instructions') if (form.plagiarism_screening.data and form.plagiarism_screening.data != 'None') or form.plagiarism_screening_url.data: bibjson.set_plagiarism_detection( form.plagiarism_screening_url.data, has_detection=forms.interpret_special(form.plagiarism_screening.data) ) if form.publication_time.data: bibjson.publication_time = form.publication_time.data bibjson.add_url(form.oa_statement_url.data, urltype='oa_statement') license_type = forms.interpret_other(form.license.data, form.license_other.data) if forms.interpret_special(license_type): # "None" and "False" as strings like they come out of the WTForms processing) # would get interpreted correctly by this check, so "None" licenses should not appear if license_type in licenses: by = licenses[license_type]['BY'] nc = licenses[license_type]['NC'] nd = licenses[license_type]['ND'] sa = licenses[license_type]['SA'] license_title = licenses[license_type]['title'] elif form.license_checkbox.data: by = True if 'BY' in form.license_checkbox.data else False nc = True if 'NC' in form.license_checkbox.data else False nd = True if 'ND' in form.license_checkbox.data else False sa = True if 'SA' in form.license_checkbox.data else False license_title = license_type else: by = None; nc = None; nd = None; sa = None; license_title = license_type bibjson.set_license( license_title, license_type, url=form.license_url.data, open_access=forms.interpret_special(form.open_access.data), by=by, nc=nc, nd=nd, sa=sa, embedded=forms.interpret_special(form.license_embedded.data), embedded_example_url=form.license_embedded_url.data ) # checkboxes deposit_policies = forms.interpret_special(form.deposit_policy.data) # need empty list if it's just "None" deposit_policies = forms.interpret_other(deposit_policies, form.deposit_policy_other.data) if deposit_policies: bibjson.deposit_policy = deposit_policies if form.copyright.data and form.copyright.data != 'None': holds_copyright = forms.interpret_other( forms.interpret_special(form.copyright.data), form.copyright_other.data ) bibjson.set_author_copyright(form.copyright_url.data, holds_copyright=holds_copyright) if form.publishing_rights.data and form.publishing_rights.data != 'None': publishing_rights = forms.interpret_other( forms.interpret_special(form.publishing_rights.data), form.publishing_rights_other.data ) bibjson.set_author_publishing_rights(form.publishing_rights_url.data, holds_rights=publishing_rights) # need to copy over the notes from the existing journal object, if any, otherwise # the dates on all the notes will get reset to right now (i.e. last_updated) # since the journal object we're creating in this xwalk is a new, empty one journal.set_notes(existing_journal.notes()) # generate index of notes, just the text curnotes = [] for curnote in journal.notes(): curnotes.append(curnote['note']) # add any new notes formnotes = [] for formnote in form.notes.data: if formnote['note']: if formnote['note'] not in curnotes and formnote["note"] != "": journal.add_note(formnote['note']) # also generate another text index of notes, this time an index of the form notes formnotes.append(formnote['note']) if current_user.has_role("delete_note"): # delete all notes not coming back from the form, means they've been deleted # also if one of the saved notes is completely blank, delete it for curnote in journal.notes()[:]: if not curnote['note'] or curnote['note'] not in formnotes: journal.remove_note(curnote) new_subjects = [] for code in form.subject.data: sobj = {"scheme": 'LCC', "term": lcc.lookup_code(code), "code": code} new_subjects.append(sobj) bibjson.set_subjects(new_subjects) owner = form.owner.data.strip() if owner: journal.set_owner(owner) editor_group = form.editor_group.data.strip() if editor_group: journal.set_editor_group(editor_group) editor = form.editor.data.strip() if editor: journal.set_editor(editor) # old fields - only create them in the journal record if the values actually exist # need to use interpret_special in the test condition in case 'None' comes back from the form if getattr(form, 'author_pays', None): if forms.interpret_special(form.author_pays.data): bibjson.author_pays = form.author_pays.data if getattr(form, 'author_pays_url', None): if forms.interpret_special(form.author_pays_url.data): bibjson.author_pays_url = form.author_pays_url.data if getattr(form, 'oa_end_year', None): if forms.interpret_special(form.oa_end_year.data): bibjson.set_oa_end(form.oa_end_year.data) return journal
def migrate_journals(source): # read in the content f = open(source) xml = etree.parse(f) f.close() journals = xml.getroot() print "migrating", str(len(journals)), "journal records" clusters = _get_journal_clusters(journals) # make a journal object, and map the main and historic records to it for canon, rest in clusters: j = Journal() cb = _to_journal_bibjson(canon) j.set_bibjson(cb) j.set_in_doaj(_is_in_doaj(canon)) j.set_created(_created_date(canon)) for p in rest: replaces = _get_replaces(p) isreplacedby = _get_isreplacedby(p) j.add_history(_to_journal_bibjson(p), replaces=replaces, isreplacedby=isreplacedby) j.save()
def test_01_discover_duplicates(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_doi_arg = kwargs.get("article_doi") doi_duplicate_arg = kwargs.get("doi_duplicate") article_fulltext_arg = kwargs.get("article_fulltext") fulltext_duplicate_arg = kwargs.get("fulltext_duplicate") articles_by_doi_arg = kwargs.get("articles_by_doi") articles_by_fulltext_arg = kwargs.get("articles_by_fulltext") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # create a journal for the owner if owner_arg not in ["none"]: source = JournalFixtureFactory.make_journal_source(in_doaj=True) journal = Journal(**source) journal.set_owner(owner.id) journal.bibjson().remove_identifiers() journal.bibjson().add_identifier("eissn", "1234-5678") journal.bibjson().add_identifier("pissn", "9876-5432") journal.save() # determine what we need to load into the index article_ids = [] aids_block = [] if owner_arg not in ["none", "no_articles"]: for i, ident in enumerate(IDENTS): the_doi = ident["doi"] if doi_duplicate_arg == "padded": the_doi = " " + the_doi + " " elif doi_duplicate_arg == "prefixed": the_doi = "https://dx.doi.org/" + the_doi the_fulltext = ident["fulltext"] if article_fulltext_arg != "invalid": if fulltext_duplicate_arg == "padded": the_fulltext = " http:" + the_fulltext elif fulltext_duplicate_arg == "http": the_fulltext = "http:" + the_fulltext elif fulltext_duplicate_arg == "https": the_fulltext = "https:" + the_fulltext else: the_fulltext = "http:" + the_fulltext source = ArticleFixtureFactory.make_article_source( eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext) article = Article(**source) article.set_id() article.save(blocking=True) article_ids.append(article.id) aids_block.append((article.id, article.last_updated)) # generate our incoming article article = None doi = None fulltext = None if article_arg == "yes": eissn = "1234=5678" # one matching pissn = "6789-1234" # the other not - issn matches are not relevant to this test if article_doi_arg in ["yes", "padded"]: doi = "10.1234/abc/11" if doi_duplicate_arg in ["yes", "padded"]: doi = IDENTS[0]["doi"] if article_doi_arg == "padded": doi = " doi:" + doi + " " elif article_doi_arg in ["invalid"]: doi = IDENTS[-1]["doi"] if article_fulltext_arg in ["yes", "padded", "https"]: fulltext = "//example.com/11" if fulltext_duplicate_arg in ["yes", "padded", "https"]: fulltext = IDENTS[0]["fulltext"] if fulltext_duplicate_arg == "padded": fulltext = " http:" + fulltext + " " elif fulltext_duplicate_arg == "https": fulltext = "https:" + fulltext else: fulltext = "http:" + fulltext elif article_fulltext_arg == "invalid": fulltext = IDENTS[-1]["fulltext"] source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) article = Article(**source) # we need to do this if doi or fulltext are none, because the factory will set a default if we don't # provide them if doi is None: article.bibjson().remove_identifiers("doi") if fulltext is None: article.bibjson().remove_urls("fulltext") article.set_id() Article.blockall(aids_block) ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.discover_duplicates(article) else: possible_articles = svc.discover_duplicates(article) if articles_by_doi_arg == "yes": assert "doi" in possible_articles assert len(possible_articles["doi"]) == 1 # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first if article_doi_arg == "invalid": assert possible_articles["doi"][0].id == article_ids[-1] else: assert possible_articles["doi"][0].id == article_ids[0] else: if possible_articles is not None: assert "doi" not in possible_articles if articles_by_fulltext_arg == "yes": assert "fulltext" in possible_articles assert len(possible_articles["fulltext"]) == 1 # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first if article_fulltext_arg == "invalid": assert possible_articles["fulltext"][0].id == article_ids[ -1] else: assert possible_articles["fulltext"][0].id == article_ids[ 0] else: if possible_articles is not None: assert "fulltext" not in possible_articles
def add_journal_metadata(self, j=None, reg=None): """ this function makes sure the article is populated with all the relevant info from its owning parent object :param j: Pass in a Journal to bypass the (slow) locating step. MAKE SURE IT'S THE RIGHT ONE! """ # Record the data that is copied into the article into the "reg"ister, in case the # caller needs to know exactly and only which information was copied if reg is None: reg = Journal() rbj = reg.bibjson() if j is None: journal = self.get_journal() else: journal = j # we were unable to find a journal if journal is None: raise NoJournalException("Unable to find a journal associated with this article") # if we get to here, we have a journal record we want to pull data from jbib = journal.bibjson() bibjson = self.bibjson() # tripwire to be tripped if the journal makes changes to the article trip = False if bibjson.subjects() != jbib.subjects(): trip = True bibjson.set_subjects(jbib.subjects()) rbj.set_subjects(jbib.subjects()) if jbib.title is not None: if bibjson.journal_title != jbib.title: trip = True bibjson.journal_title = jbib.title rbj.title = jbib.title if jbib.get_license() is not None: lic = jbib.get_license() alic = bibjson.get_journal_license() if lic is not None and (alic is None or (lic.get("title") != alic.get("title") or lic.get("type") != alic.get("type") or lic.get("url") != alic.get("url") or lic.get("version") != alic.get("version") or lic.get("open_access") != alic.get("open_access"))): bibjson.set_journal_license(lic.get("title"), lic.get("type"), lic.get("url"), lic.get("version"), lic.get("open_access")) trip = True rbj.set_license(lic.get("title"), lic.get("type"), lic.get("url"), lic.get("version"), lic.get("open_access")) if len(jbib.language) > 0: jlang = jbib.language alang = bibjson.journal_language jlang.sort() alang.sort() if jlang != alang: bibjson.journal_language = jbib.language trip = True rbj.set_language(jbib.language) if jbib.country is not None: if jbib.country != bibjson.journal_country: bibjson.journal_country = jbib.country trip = True rbj.country = jbib.country if jbib.publisher: if jbib.publisher != bibjson.publisher: bibjson.publisher = jbib.publisher trip = True rbj.publisher = jbib.publisher # Copy the seal info, in_doaj status and the journal's ISSNs if journal.is_in_doaj() != self.is_in_doaj(): self.set_in_doaj(journal.is_in_doaj()) trip = True reg.set_in_doaj(journal.is_in_doaj()) if journal.has_seal() != self.has_seal(): self.set_seal(journal.has_seal()) trip = True reg.set_seal(journal.has_seal()) try: aissns = bibjson.journal_issns jissns = jbib.issns() aissns.sort() jissns.sort() if aissns != jissns: bibjson.journal_issns = jbib.issns() trip = True eissns = jbib.get_identifiers(jbib.E_ISSN) pissns = jbib.get_identifiers(jbib.P_ISSN) if eissns is not None and len(eissns) > 0: rbj.add_identifier(rbj.E_ISSN, eissns[0]) if pissns is not None and len(pissns) > 0: rbj.add_identifier(rbj.P_ISSN, pissns[0]) except KeyError: # No issns, don't worry about it for now pass return trip
def form2obj(form, existing_journal): journal = Journal() bibjson = journal.bibjson() # The if statements that wrap practically every field are there due to this # form being used to edit old journals which don't necessarily have most of # this info. # It also allows admins to delete the contents of any field if they wish, # by ticking the "Allow incomplete form" checkbox and deleting the contents # of that field. The if condition(s) will then *not* add the relevant field to the # new journal object being constructed. # add_url in the journal model has a safeguard against empty URL-s. if form.title.data: bibjson.title = form.title.data bibjson.add_url(form.url.data, urltype='homepage') if form.alternative_title.data: bibjson.alternative_title = form.alternative_title.data if form.pissn.data: bibjson.add_identifier(bibjson.P_ISSN, form.pissn.data) if form.eissn.data: bibjson.add_identifier(bibjson.E_ISSN, form.eissn.data) if form.publisher.data: bibjson.publisher = form.publisher.data if form.society_institution.data: bibjson.institution = form.society_institution.data if form.platform.data: bibjson.provider = form.platform.data if form.contact_name.data or form.contact_email.data: journal.add_contact(form.contact_name.data, form.contact_email.data) if form.country.data: bibjson.country = form.country.data if forms.interpret_special(form.processing_charges.data): bibjson.set_apc(form.processing_charges_currency.data, form.processing_charges_amount.data) if forms.interpret_special(form.submission_charges.data): bibjson.set_submission_charges( form.submission_charges_currency.data, form.submission_charges_amount.data) if forms.interpret_special(form.waiver_policy.data): bibjson.add_url(form.waiver_policy_url.data, 'waiver_policy') # checkboxes if forms.interpret_special(form.digital_archiving_policy.data ) or form.digital_archiving_policy_url.data: archiving_policies = forms.interpret_special( form.digital_archiving_policy.data) archiving_policies = forms.interpret_other( archiving_policies, form.digital_archiving_policy_other.data, store_other_label=True) archiving_policies = forms.interpret_other( archiving_policies, form.digital_archiving_policy_library.data, forms.digital_archiving_policy_specific_library_value, store_other_label=True) bibjson.set_archiving_policy( archiving_policies, form.digital_archiving_policy_url.data) if form.crawl_permission.data and form.crawl_permission.data != 'None': bibjson.allows_fulltext_indexing = forms.interpret_special( form.crawl_permission.data) # just binary # checkboxes article_ids = forms.interpret_special(form.article_identifiers.data) article_ids = forms.interpret_other( article_ids, form.article_identifiers_other.data) if article_ids: bibjson.persistent_identifier_scheme = article_ids if (form.download_statistics.data and form.download_statistics.data != 'None') or form.download_statistics_url.data: bibjson.set_article_statistics( form.download_statistics_url.data, forms.interpret_special(form.download_statistics.data)) if form.first_fulltext_oa_year.data: bibjson.set_oa_start(year=form.first_fulltext_oa_year.data) # checkboxes fulltext_format = forms.interpret_other( form.fulltext_format.data, form.fulltext_format_other.data) if fulltext_format: bibjson.format = fulltext_format if form.keywords.data: bibjson.set_keywords(form.keywords.data) # tag list field if form.languages.data: bibjson.set_language(form.languages.data ) # select multiple field - gives a list back bibjson.add_url(form.editorial_board_url.data, urltype='editorial_board') if form.review_process.data or form.review_process_url.data: bibjson.set_editorial_review(form.review_process.data, form.review_process_url.data) bibjson.add_url(form.aims_scope_url.data, urltype='aims_scope') bibjson.add_url(form.instructions_authors_url.data, urltype='author_instructions') if (form.plagiarism_screening.data and form.plagiarism_screening.data != 'None') or form.plagiarism_screening_url.data: bibjson.set_plagiarism_detection( form.plagiarism_screening_url.data, has_detection=forms.interpret_special( form.plagiarism_screening.data)) if form.publication_time.data: bibjson.publication_time = form.publication_time.data bibjson.add_url(form.oa_statement_url.data, urltype='oa_statement') license_type = forms.interpret_other(form.license.data, form.license_other.data) if forms.interpret_special(license_type): # "None" and "False" as strings like they come out of the WTForms processing) # would get interpreted correctly by this check, so "None" licenses should not appear if license_type in licenses: by = licenses[license_type]['BY'] nc = licenses[license_type]['NC'] nd = licenses[license_type]['ND'] sa = licenses[license_type]['SA'] license_title = licenses[license_type]['title'] elif form.license_checkbox.data: by = True if 'BY' in form.license_checkbox.data else False nc = True if 'NC' in form.license_checkbox.data else False nd = True if 'ND' in form.license_checkbox.data else False sa = True if 'SA' in form.license_checkbox.data else False license_title = license_type else: by = None nc = None nd = None sa = None license_title = license_type bibjson.set_license( license_title, license_type, url=form.license_url.data, open_access=forms.interpret_special(form.open_access.data), by=by, nc=nc, nd=nd, sa=sa, embedded=forms.interpret_special(form.license_embedded.data), embedded_example_url=form.license_embedded_url.data) # checkboxes deposit_policies = forms.interpret_special( form.deposit_policy.data) # need empty list if it's just "None" deposit_policies = forms.interpret_other( deposit_policies, form.deposit_policy_other.data) if deposit_policies: bibjson.deposit_policy = deposit_policies if form.copyright.data and form.copyright.data != 'None': holds_copyright = forms.interpret_other( forms.interpret_special(form.copyright.data), form.copyright_other.data) bibjson.set_author_copyright(form.copyright_url.data, holds_copyright=holds_copyright) if form.publishing_rights.data and form.publishing_rights.data != 'None': publishing_rights = forms.interpret_other( forms.interpret_special(form.publishing_rights.data), form.publishing_rights_other.data) bibjson.set_author_publishing_rights( form.publishing_rights_url.data, holds_rights=publishing_rights) # need to copy over the notes from the existing journal object, if any, otherwise # the dates on all the notes will get reset to right now (i.e. last_updated) # since the journal object we're creating in this xwalk is a new, empty one journal.set_notes(existing_journal.notes()) # generate index of notes, just the text curnotes = [] for curnote in journal.notes(): curnotes.append(curnote['note']) # add any new notes formnotes = [] for formnote in form.notes.data: if formnote['note']: if formnote['note'] not in curnotes and formnote["note"] != "": journal.add_note(formnote['note']) # also generate another text index of notes, this time an index of the form notes formnotes.append(formnote['note']) if current_user.has_role("delete_note"): # delete all notes not coming back from the form, means they've been deleted # also if one of the saved notes is completely blank, delete it for curnote in journal.notes()[:]: if not curnote['note'] or curnote['note'] not in formnotes: journal.remove_note(curnote) new_subjects = [] for code in form.subject.data: sobj = { "scheme": 'LCC', "term": lcc.lookup_code(code), "code": code } new_subjects.append(sobj) bibjson.set_subjects(new_subjects) owner = form.owner.data.strip() if owner: journal.set_owner(owner) editor_group = form.editor_group.data.strip() if editor_group: journal.set_editor_group(editor_group) editor = form.editor.data.strip() if editor: journal.set_editor(editor) # old fields - only create them in the journal record if the values actually exist # need to use interpret_special in the test condition in case 'None' comes back from the form if getattr(form, 'author_pays', None): if forms.interpret_special(form.author_pays.data): bibjson.author_pays = form.author_pays.data if getattr(form, 'author_pays_url', None): if forms.interpret_special(form.author_pays_url.data): bibjson.author_pays_url = form.author_pays_url.data if getattr(form, 'oa_end_year', None): if forms.interpret_special(form.oa_end_year.data): bibjson.set_oa_end(form.oa_end_year.data) return journal
def test_01_accept_application(self, name, application_type, account_type, manual_update, provenance, raises, result_provenance, result_manual_update): ############################################### ## set up # create the application application = None if application_type == "save_fail": application = Suggestion(**ApplicationFixtureFactory.make_application_source()) application.save = mock_save Journal.save = mock_save elif application_type == "with_current_journal": application = Suggestion(**ApplicationFixtureFactory.make_application_source()) application.remove_notes() application.add_note("unique 1", "2002-01-01T00:00:00Z") application.add_note("duplicate", "2001-01-01T00:00:00Z") cj = application.current_journal journal = Journal(**JournalFixtureFactory.make_journal_source()) journal.set_id(cj) journal.remove_notes() journal.add_note("unique 2", "2003-01-01T00:00:00Z") journal.add_note("duplicate", "2001-01-01T00:00:00Z") journal.save(blocking=True) elif application_type == "no_current_journal": application = Suggestion(**ApplicationFixtureFactory.make_application_source()) application.remove_current_journal() acc = None if account_type == "not_allowed": acc = Account(**AccountFixtureFactory.make_publisher_source()) elif account_type == "allowed": acc = Account(**AccountFixtureFactory.make_managing_editor_source()) mu = None if manual_update in ["true", "false"]: mu = manual_update == "true" prov = None if provenance in ["true", "false"]: prov = provenance == "true" save = bool(randint(0,1)) ########################################################### # Execution svc = DOAJ.applicationService() if raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.accept_application(application, acc, mu, prov) else: journal = svc.accept_application(application, acc, mu, prov, save_journal=save, save_application=save) # we need to sleep, so the index catches up time.sleep(1) # check a few common things assert application.application_status == constants.APPLICATION_STATUS_ACCEPTED assert application.current_journal is None assert journal.current_application is None assert application.related_journal == journal.id related = journal.related_applications if application_type == "with_current_journal": assert len(related) == 3 elif application_type == "no_current_journal": assert len(related) == 1 assert related[0].get("application_id") == application.id assert related[0].get("date_accepted") is not None if result_manual_update == "yes": assert journal.last_manual_update is not None assert journal.last_manual_update != "1970-01-01T00:00:00Z" assert application.last_manual_update is not None assert application.last_manual_update != "1970-01-01T00:00:00Z" elif result_manual_update == "no": assert journal.last_manual_update is None assert application.last_manual_update is None if application_type == "with_current_journal": assert len(journal.notes) == 3 notevals = [note.get("note") for note in journal.notes] assert "duplicate" in notevals assert "unique 1" in notevals assert "unique 2" in notevals app_prov = Provenance.get_latest_by_resource_id(application.id) if result_provenance == "yes": assert app_prov is not None elif result_provenance == "no": assert app_prov is None if save: pass
# first, get each application and consider it counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"): counter += 1 application = Suggestion(**result) application.remove_related_journal() # find all the journals that this application could be associated with (which we need to do by issn) issns = application.bibjson().issns() # query by each issn individually, because we're looking for the widest possible map. Querying by # both would require both issns match related_journals = [] related_journal_ids = [] for issn in issns: journals = Journal.find_by_issn(issn) for journal in journals: if journal.id not in related_journal_ids: related_journal_ids.append(journal.id) related_journals.append(journal) if len(related_journals) > 0: # sort the journals by their created date related_journals = sorted(related_journals, key=lambda j: j.created_timestamp) # we set an application as having a related journal in the following conditions: # 1. The application was created before the journal and last updated near or after the journal created date, # and this journal is the nearest one in time # 2. The last_reapplication date is after the application created date, and is the nearest one app_created = application.created_timestamp
def test_01_delete_application(self, name, application_type, account_type, current_journal, related_journal, raises): ############################################### ## set up # create the test application (if needed), and the associated current_journal and related_journal in suitable states application = None cj = None rj = None if application_type == "found" or application_type == "locked": application = Suggestion(**ApplicationFixtureFactory.make_application_source()) if current_journal == "none": application.remove_current_journal() elif current_journal == "not_found": application.set_current_journal("123456789987654321") elif current_journal == "found": cj = Journal(**JournalFixtureFactory.make_journal_source()) cj.set_id(cj.makeid()) cj.save(blocking=True) application.set_current_journal(cj.id) elif current_journal == "locked": cj = Journal(**JournalFixtureFactory.make_journal_source()) cj.set_id(cj.makeid()) cj.save(blocking=True) application.set_current_journal(cj.id) lock.lock(constants.LOCK_JOURNAL, cj.id, "otheruser") if related_journal == "none": application.remove_related_journal() elif related_journal == "not_found": application.set_related_journal("123456789987654321") elif related_journal == "found": rj = Journal(**JournalFixtureFactory.make_journal_source()) rj.set_id(rj.makeid()) rj.save(blocking=True) application.set_related_journal(rj.id) elif related_journal == "locked": rj = Journal(**JournalFixtureFactory.make_journal_source()) rj.set_id(rj.makeid()) rj.save(blocking=True) application.set_related_journal(rj.id) lock.lock(constants.LOCK_JOURNAL, rj.id, "otheruser") acc = None if account_type != "none": acc = Account(**AccountFixtureFactory.make_publisher_source()) if account_type == "not_permitted": acc.remove_role("publisher") if application_type == "locked": thelock = lock.lock(constants.LOCK_APPLICATION, application.id, "otheruser") # we can't explicitly block on the lock, but we can halt until we confirm it is saved thelock.blockall([(thelock.id, thelock.last_updated)]) application_id = None if application is not None: if acc is not None: application.set_owner(acc.id) application.save(blocking=True) application_id = application.id elif application_type == "not_found": application_id = u"sdjfasofwefkwflkajdfasjd" ########################################################### # Execution svc = DOAJ.applicationService() if raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.delete_application(application_id, acc) time.sleep(1) check_locks(application, cj, rj, acc) else: svc.delete_application(application_id, acc) # we need to sleep, so the index catches up time.sleep(1) # check that no locks remain set for this user check_locks(application, cj, rj, acc) # check that the application actually is gone if application is not None: assert Suggestion.pull(application.id) is None # check that the current journal no longer has a reference to the application if cj is not None: cj = Journal.pull(cj.id) assert cj.current_application is None # check that the related journal has a record that the application was deleted if rj is not None: rj = Journal.pull(rj.id) record = rj.related_application_record(application.id) assert "status" in record assert record["status"] == "deleted"
def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises): # set up for the test ######################################### cj = None has_seal = bool(randint(0, 1)) application = None if application_type == "present": application = Suggestion(**ApplicationFixtureFactory.make_application_source()) application.set_id(application.makeid()) application.remove_contacts() application.remove_editor_group() application.remove_editor() application.remove_owner() application.remove_current_journal() application.remove_notes() if app_key_properties == "yes": application.add_contact("Application", "*****@*****.**") application.set_editor_group("appeditorgroup") application.set_editor("appeditor") application.set_owner("appowner") application.set_seal(has_seal) application.add_note("Application Note") if current_journal == "present": journal = Journal(**JournalFixtureFactory.make_journal_source()) journal.remove_contacts() journal.add_contact("Journal", "*****@*****.**") journal.set_editor_group("journaleditorgroup") journal.set_editor("journaleditor") journal.set_owner("journalowner") journal.remove_current_application() journal.remove_notes() journal.add_note("Journal Note") journal.save(blocking=True) application.set_current_journal(journal.id) cj = journal elif current_journal == "missing": application.set_current_journal("123456789987654321") manual_update = None if manual_update_arg == "true": manual_update = True elif manual_update_arg == "false": manual_update = False # execute the test ######################################## svc = DOAJ.applicationService() if raises is not None and raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.application_2_journal(application, manual_update) else: journal = svc.application_2_journal(application, manual_update) # check the result ###################################### assert journal is not None assert isinstance(journal, Journal) assert journal.is_in_doaj() is True jbj = journal.bibjson().data del jbj["active"] assert jbj == application.bibjson().data if current_journal == "present": assert len(journal.related_applications) == 3 else: assert len(journal.related_applications) == 1 related = journal.related_application_record(application.id) assert related is not None if manual_update_arg == "true": assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z" if app_key_properties == "yes": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Application" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "appeditorgroup" assert journal.editor == "appeditor" assert journal.owner == "appowner" assert journal.has_seal() == has_seal if current_journal == "present": assert len(journal.notes) == 2 else: assert len(journal.notes) == 1 elif app_key_properties == "no": if current_journal == "present": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Journal" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "journaleditorgroup" assert journal.editor == "journaleditor" assert journal.owner == "journalowner" assert journal.has_seal() == has_seal assert len(journal.notes) == 2 elif current_journal == "none" or current_journal == "missing": contacts = journal.contacts() assert len(contacts) == 0 assert journal.editor_group is None assert journal.editor is None assert journal.owner is None assert journal.has_seal() == has_seal assert len(journal.notes) == 1 if current_journal == "present": assert cj.id == journal.id assert cj.created_date == journal.created_date
from portality.models import Journal import sys # FIXME: in an ideal world, the functional tests would also be wrapped by doaj.helpers.DoajTestCase from doajtest.bootstrap import prepare_for_test prepare_for_test() journals = [ {'title':'Title 1', 'publisher': 'Publisher 1', 'identifier':[{'type':'pissn', 'id': '1234-5678'}], 'active':True}, {'title':'Title 2', 'publisher': 'Publisher 2', 'identifier':[{'type':'pissn', 'id': '0123-4567'}, {'type':'eissn', 'id': '7654-3210'}], 'license': [{'type': 'cc-by','open_access': True, 'url': 'http://opendefinition.org/licenses/cc-by/'}], 'active': True}, {'title':'Title 3', 'publisher': 'Publisher 3', 'author_pays': False, 'active': False}, {'title':'Title 4', 'publisher': 'Publisher 3', 'language': 'en', 'active': True}, {'title':'The "Quoted" Journal of Testing CSV Quotes', 'publisher': 'Publisher 2', 'identifier':[{'type':'pissn', 'id': '2345-6789'}], 'active': False, 'language': 'bg', 'author_pays':True, 'active':True, }, ] for j in journals: jm = Journal(**{'bibjson':j}) jm.save() print 'Sent {0} Journal documents to the index'.format(len(journals)) print 'Refreshing the index' Journal.refresh()
# first, get each application and consider it counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"): counter += 1 application = Suggestion(**result) application.remove_related_journal() # find all the journals that this application could be associated with (which we need to do by issn) issns = application.bibjson().issns() # query by each issn individually, because we're looking for the widest possible map. Querying by # both would require both issns match related_journals = [] related_journal_ids = [] for issn in issns: journals = Journal.find_by_issn(issn) for journal in journals: if journal.id not in related_journal_ids: related_journal_ids.append(journal.id) related_journals.append(journal) if len(related_journals) > 0: # sort the journals by their created date related_journals = sorted(related_journals, key=lambda j: j.created_timestamp) # we set an application as having a related journal in the following conditions: # 1. The application was created before the journal and last updated near or after the journal created date, # and this journal is the nearest one in time # 2. The last_reapplication date is after the application created date, and is the nearest one app_created = application.created_timestamp for journal in related_journals:
def applications_inconsistencies(outfile_later, outfile_missing, conn): with codecs.open(outfile_later, "wb", "utf-8") as f, codecs.open(outfile_missing, "wb", "utf-8") as g: out_later = csv.writer(f) out_later.writerow([ "Application ID", "Application Last Updated", "Latest Provenance Recorded", "Difference" ]) out_missing = UnicodeWriter(g) out_missing.writerow([ "Application ID", "Application Last Manual Update", "Latest Provenance Record", "ISSNs", "Title" ]) counter = 0 for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"): counter += 1 application = Suggestion(**result) print counter, application.id # Part 1 - later provenance records exist latest_prov = Provenance.get_latest_by_resource_id(application.id) if latest_prov is not None: lustamp = adjust_timestamp(application.last_updated_timestamp, APP_TIMEZONE_CUTOFF) created = latest_prov.created_date pstamp = latest_prov.created_timestamp td = pstamp - lustamp diff = td.total_seconds() if diff > THRESHOLD: out_later.writerow([ application.id, application.last_updated, created, diff ]) # Part 2 - missing journals if application.application_status == constants.APPLICATION_STATUS_ACCEPTED: missing = False # find the matching journals by issn or by title matching_journals = Journal.find_by_issn( application.bibjson().issns()) if len(matching_journals) == 0: # Have another go, find by title matching_journals = Journal.find_by_title( application.bibjson().title) # if there are no matching journals, it is missing. if len(matching_journals) == 0: missing = True else: # if there are matching journals, find out if any of them are in the doaj. If none, then journal is still missing those_in_doaj = len( [j for j in matching_journals if j.is_in_doaj()]) if those_in_doaj == 0: missing = True # if the journal is missing, record it if missing: created = "" if latest_prov is not None: created = latest_prov.created_date out_missing.writerow([ application.id, application.last_manual_update, created, " ".join(application.bibjson().issns()), application.bibjson().title ]) print "processed", counter, "suggestions"
def test_01_create_article(self, name, kwargs): article_arg = kwargs.get("article") article_duplicate_arg = kwargs.get("article_duplicate") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") add_journal_info_arg = kwargs.get("add_journal_info") dry_run_arg = kwargs.get("dry_run") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") original_saved_arg = kwargs.get("original_saved") merge_saved_arg = kwargs.get("merge_saved") ############################################### ## set up success = int(success_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False dry_run = None if dry_run_arg != "none": dry_run = True if dry_run_arg == "true" else False raises = EXCEPTIONS.get(raises_arg) eissn = "1234-5678" pissn = "9876-5432" if add_journal_info: jsource = JournalFixtureFactory.make_journal_source(in_doaj=True) j = Journal(**jsource) bj = j.bibjson() bj.title = "Add Journal Info Title" bj.remove_identifiers() bj.add_identifier(bj.P_ISSN, pissn) bj.add_identifier(bj.E_ISSN, eissn) j.save(blocking=True) article = None original_id = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") del source["bibjson"]["journal"] article = Article(**source) article.set_id() original_id = article.id account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) legit = True if account_arg == "owner" else False ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit) self.svc.is_legitimate_owner = ilo_mock owned = [eissn, pissn] if account_arg == "owner" else [] shared = [] unowned = [eissn] if account_arg == "not_owner" else [] unmatched = [pissn] if account_arg == "not_owner" else [] ios_mock = BLLArticleMockFactory.issn_ownership_status(owned, shared, unowned, unmatched) self.svc.issn_ownership_status = ios_mock gd_mock = None if article_duplicate_arg == "yes": gd_mock = BLLArticleMockFactory.get_duplicate(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock mock_article = self.svc.get_duplicate(article) ########################################################### # Execution if raises is not None: with self.assertRaises(raises): self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) else: report = self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) assert report["success"] == success # check that the article was saved and if it was saved that it was suitably merged if original_saved_arg == "yes": original = Article.pull(original_id) assert original is not None assert report["update"] == 0 elif article is not None: original = Article.pull(original_id) assert original is None if merge_saved_arg == "yes": merged = Article.pull(mock_article.id) assert merged is not None assert report["update"] == 1 elif mock_article is not None: merged = Article.pull(mock_article.id) assert merged is None if add_journal_info: assert article.bibjson().journal_title == "Add Journal Info Title"
def add_journal_metadata(self, j=None, reg=None): """ this function makes sure the article is populated with all the relevant info from its owning parent object :param j: Pass in a Journal to bypass the (slow) locating step. MAKE SURE IT'S THE RIGHT ONE! """ # Record the data that is copied into the article into the "reg"ister, in case the # caller needs to know exactly and only which information was copied if reg is None: reg = Journal() rbj = reg.bibjson() if j is None: journal = self.get_journal() else: journal = j # we were unable to find a journal if journal is None: raise NoJournalException( "Unable to find a journal associated with this article") # if we get to here, we have a journal record we want to pull data from jbib = journal.bibjson() bibjson = self.bibjson() # tripwire to be tripped if the journal makes changes to the article trip = False if bibjson.subjects() != jbib.subjects(): trip = True bibjson.set_subjects(jbib.subjects()) rbj.set_subjects(jbib.subjects()) if jbib.title is not None: if bibjson.journal_title != jbib.title: trip = True bibjson.journal_title = jbib.title rbj.title = jbib.title if jbib.get_license() is not None: lic = jbib.get_license() alic = bibjson.get_journal_license() if lic is not None and ( alic is None or (lic.get("title") != alic.get("title") or lic.get("type") != alic.get("type") or lic.get("url") != alic.get("url") or lic.get("version") != alic.get("version") or lic.get("open_access") != alic.get("open_access"))): bibjson.set_journal_license(lic.get("title"), lic.get("type"), lic.get("url"), lic.get("version"), lic.get("open_access")) trip = True rbj.set_license(lic.get("title"), lic.get("type"), lic.get("url"), lic.get("version"), lic.get("open_access")) if len(jbib.language) > 0: jlang = jbib.language alang = bibjson.journal_language jlang.sort() alang.sort() if jlang != alang: bibjson.journal_language = jbib.language trip = True rbj.set_language(jbib.language) if jbib.country is not None: if jbib.country != bibjson.journal_country: bibjson.journal_country = jbib.country trip = True rbj.country = jbib.country if jbib.publisher: if jbib.publisher != bibjson.publisher: bibjson.publisher = jbib.publisher trip = True rbj.publisher = jbib.publisher # Copy the seal info, in_doaj status and the journal's ISSNs if journal.is_in_doaj() != self.is_in_doaj(): self.set_in_doaj(journal.is_in_doaj()) trip = True reg.set_in_doaj(journal.is_in_doaj()) if journal.has_seal() != self.has_seal(): self.set_seal(journal.has_seal()) trip = True reg.set_seal(journal.has_seal()) try: aissns = bibjson.journal_issns jissns = jbib.issns() aissns.sort() jissns.sort() if aissns != jissns: bibjson.journal_issns = jbib.issns() trip = True eissns = jbib.get_identifiers(jbib.E_ISSN) pissns = jbib.get_identifiers(jbib.P_ISSN) if eissns is not None and len(eissns) > 0: rbj.add_identifier(rbj.E_ISSN, eissns[0]) if pissns is not None and len(pissns) > 0: rbj.add_identifier(rbj.P_ISSN, pissns[0]) except KeyError: # No issns, don't worry about it for now pass return trip
def test_01_reject_application(self, name, application, application_status, account, prov, current_journal, note, save, raises=None): ####################################### ## set up if save == "fail": Suggestion.save = mock_save_fail ap = None journal = None if application == "exists": ap = Suggestion(**ApplicationFixtureFactory.make_application_source()) ap.set_application_status(application_status) ap.set_id(ap.makeid()) ap.remove_notes() if current_journal == "yes": journal = Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_id(journal.makeid()) journal.set_current_application(ap.id) journal.save(blocking=True) ap.set_current_journal(journal.id) else: ap.remove_current_journal() acc = None if account == "publisher": acc = Account(**AccountFixtureFactory.make_publisher_source()) elif account == "admin": acc = Account(**AccountFixtureFactory.make_managing_editor_source()) provenance = None if prov != "none": provenance = prov == "true" thenote = None if note == "yes": thenote = "abcdefg" ######################################## ## execute svc = DOAJ.applicationService() if raises is not None and raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.reject_application(ap, acc, provenance, note=thenote) else: svc.reject_application(ap, acc, provenance, note=thenote) time.sleep(1) ####################################### ## Check ap2 = Suggestion.pull(ap.id) assert ap2 is not None assert ap2.application_status == constants.APPLICATION_STATUS_REJECTED assert ap2.current_journal is None # check the updated and manually updated date are essentially the same (they can theoretically differ # by a small amount just based on when they are set) updated_spread = abs((ap2.last_updated_timestamp - ap2.last_manual_update_timestamp).total_seconds()) assert updated_spread <= 1.0 if current_journal == "yes" and journal is not None: j2 = Journal.pull(journal.id) assert j2 is not None assert j2.current_application is None assert ap2.related_journal == j2.id if prov == "true": pr = Provenance.get_latest_by_resource_id(ap.id) assert pr is not None if note == "yes": assert len(ap2.notes) == 1 assert ap2.notes[0].get("note") == "abcdefg" elif note == "no": assert len(ap2.notes) == 0
def journals_applications_provenance(outfile_applications, outfile_accounts, outfile_reapps, conn): with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open( outfile_accounts, "wb", "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h: out_applications = csv.writer(f) out_applications.writerow([ "Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Last Updated", "Application Status", "Published Diff", "Latest Edit Recorded", "Latest Accepted Recorded" ]) out_accounts = csv.writer(g) out_accounts.writerow([ "Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ", "Missing Account ID" ]) out_reapps = csv.writer(h) out_reapps.writerow([ "Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Created", "Application Last Updated", "Application Last Manual Update", "Application Status", "Published Diff" ]) counter = 0 for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"): counter += 1 journal = Journal(**result) print counter, journal.id # first figure out if there is a broken related application issns = journal.bibjson().issns() applications = Suggestion.find_by_issn(issns) latest = None for application in applications: if latest is None: latest = application if application.last_updated_timestamp > latest.last_updated_timestamp: latest = application if latest is None: continue jcreated = journal.created_timestamp reapp = journal.last_update_request print counter, journal.id, reapp if reapp is not None: jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ") jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF) app_lustamp = adjust_timestamp(latest.last_updated_timestamp, APP_TIMEZONE_CUTOFF) # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one app_man_lustamp = adjust_timestamp( latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF) td = jcreated - app_lustamp mtd = jcreated - app_man_lustamp diff = td.total_seconds() mdiff = mtd.total_seconds() # was the journal created after the application by greater than the threshold? if diff > THRESHOLD: last_edit = "" last_accept = "" edit_query = deepcopy(PROV_QUERY) edit_query["query"]["bool"]["must"][0]["term"][ "resource_id.exact"] = latest.id edit_query["query"]["bool"]["must"][1]["term"][ "action.exact"] = "edit" provs = Provenance.q2obj(q=edit_query) if len(provs) > 0: last_edit = provs[0].last_updated accept_query = deepcopy(PROV_QUERY) accept_query["query"]["bool"]["must"][0]["term"][ "resource_id.exact"] = latest.id accept_query["query"]["bool"]["must"][1]["term"][ "action.exact"] = "status:accepted" provs = Provenance.q2obj(q=accept_query) if len(provs) > 0: last_accept = provs[0].last_updated out_applications.writerow([ journal.id, journal.created_date, journal.last_update_request, latest.id, latest.last_updated, latest.application_status, diff, last_edit, last_accept ]) # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj( ): out_reapps.writerow([ journal.id, journal.created_date, journal.last_update_request, latest.id, latest.created_date, latest.last_updated, latest.last_manual_update, latest.application_status, mdiff ]) # now figure out if the account is missing owner = journal.owner if owner is None: out_accounts.writerow([ journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), "NO OWNER" ]) else: acc = Account.pull(owner) if acc is None: out_accounts.writerow([ journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), owner ]) print "processed", counter, "journals"
def test_01_discover_duplicates(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_doi_arg = kwargs.get("article_doi") doi_duplicate_arg = kwargs.get("doi_duplicate") article_fulltext_arg = kwargs.get("article_fulltext") fulltext_duplicate_arg = kwargs.get("fulltext_duplicate") articles_by_doi_arg = kwargs.get("articles_by_doi") articles_by_fulltext_arg = kwargs.get("articles_by_fulltext") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # create a journal for the owner if owner_arg not in ["none"]: source = JournalFixtureFactory.make_journal_source(in_doaj=True) journal = Journal(**source) journal.set_owner(owner.id) journal.bibjson().remove_identifiers() journal.bibjson().add_identifier("eissn", "1234-5678") journal.bibjson().add_identifier("pissn", "9876-5432") journal.save(blocking=True) # determine what we need to load into the index article_ids = [] aids_block = [] if owner_arg not in ["none", "no_articles"]: for i, ident in enumerate(IDENTS): the_doi = ident["doi"] if doi_duplicate_arg == "padded": the_doi = " " + the_doi + " " elif doi_duplicate_arg == "prefixed": the_doi = "https://dx.doi.org/" + the_doi the_fulltext = ident["fulltext"] if article_fulltext_arg != "invalid": if fulltext_duplicate_arg == "padded": the_fulltext = " http:" + the_fulltext elif fulltext_duplicate_arg == "http": the_fulltext = "http:" + the_fulltext elif fulltext_duplicate_arg == "https": the_fulltext = "https:" + the_fulltext else: the_fulltext = "http:" + the_fulltext source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext) article = Article(**source) article.set_id() article.save() article_ids.append(article.id) aids_block.append((article.id, article.last_updated)) # generate our incoming article article = None doi = None fulltext = None if article_arg == "yes": eissn = "1234=5678" # one matching pissn = "6789-1234" # the other not - issn matches are not relevant to this test if article_doi_arg in ["yes", "padded"]: doi = "10.1234/abc/11" if doi_duplicate_arg in ["yes", "padded"]: doi = IDENTS[0]["doi"] if article_doi_arg == "padded": doi = " doi:" + doi + " " elif article_doi_arg in ["invalid"]: doi = IDENTS[-1]["doi"] if article_fulltext_arg in ["yes", "padded", "https"]: fulltext = "//example.com/11" if fulltext_duplicate_arg in ["yes", "padded", "https"]: fulltext = IDENTS[0]["fulltext"] if fulltext_duplicate_arg == "padded": fulltext = " http:" + fulltext + " " elif fulltext_duplicate_arg == "https": fulltext = "https:" + fulltext else: fulltext = "http:" + fulltext elif article_fulltext_arg == "invalid": fulltext = IDENTS[-1]["fulltext"] source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) article = Article(**source) # we need to do this if doi or fulltext are none, because the factory will set a default if we don't # provide them if doi is None: article.bibjson().remove_identifiers("doi") if fulltext is None: article.bibjson().remove_urls("fulltext") article.set_id() Article.blockall(aids_block) ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.discover_duplicates(article, owner_id) else: possible_articles = svc.discover_duplicates(article, owner_id) if articles_by_doi_arg == "yes": assert "doi" in possible_articles assert len(possible_articles["doi"]) == 1 # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first if article_doi_arg == "invalid": assert possible_articles["doi"][0].id == article_ids[-1] else: assert possible_articles["doi"][0].id == article_ids[0] else: if possible_articles is not None: assert "doi" not in possible_articles if articles_by_fulltext_arg == "yes": assert "fulltext" in possible_articles assert len(possible_articles["fulltext"]) == 1 # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first if article_fulltext_arg == "invalid": assert possible_articles["fulltext"][0].id == article_ids[-1] else: assert possible_articles["fulltext"][0].id == article_ids[0] else: if possible_articles is not None: assert "fulltext" not in possible_articles
def journals_applications_provenance(outfile_applications, outfile_accounts, outfile_reapps, conn): with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open(outfile_accounts, "wb", "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h: out_applications = csv.writer(f) out_applications.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Last Updated", "Application Status", "Published Diff", "Latest Edit Recorded", "Latest Accepted Recorded"]) out_accounts = csv.writer(g) out_accounts.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ", "Missing Account ID"]) out_reapps = csv.writer(h) out_reapps.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Created", "Application Last Updated", "Application Last Manual Update", "Application Status", "Published Diff"]) counter = 0 for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"): counter += 1 journal = Journal(**result) print counter, journal.id # first figure out if there is a broken related application issns = journal.bibjson().issns() applications = Suggestion.find_by_issn(issns) latest = None for application in applications: if latest is None: latest = application if application.last_updated_timestamp > latest.last_updated_timestamp: latest = application if latest is None: continue jcreated = journal.created_timestamp reapp = journal.last_update_request print counter, journal.id, reapp if reapp is not None: jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ") jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF) app_lustamp = adjust_timestamp(latest.last_updated_timestamp, APP_TIMEZONE_CUTOFF) # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one app_man_lustamp = adjust_timestamp(latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF) td = jcreated - app_lustamp mtd = jcreated - app_man_lustamp diff = td.total_seconds() mdiff = mtd.total_seconds() # was the journal created after the application by greater than the threshold? if diff > THRESHOLD: last_edit = "" last_accept = "" edit_query = deepcopy(PROV_QUERY) edit_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id edit_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "edit" provs = Provenance.q2obj(q=edit_query) if len(provs) > 0: last_edit = provs[0].last_updated accept_query = deepcopy(PROV_QUERY) accept_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id accept_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "status:accepted" provs = Provenance.q2obj(q=accept_query) if len(provs) > 0: last_accept = provs[0].last_updated out_applications.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.last_updated, latest.application_status, diff, last_edit, last_accept]) # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj(): out_reapps.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.created_date, latest.last_updated, latest.last_manual_update, latest.application_status, mdiff]) # now figure out if the account is missing owner = journal.owner if owner is None: out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), "NO OWNER"]) else: acc = Account.pull(owner) if acc is None: out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), owner]) print "processed", counter, "journals"
def test_01_update_request(self, name, journal_id, journal_lock, account, account_role, account_is_owner, current_applications, application_lock, application_status, completed_applications, raises, return_app, return_jlock, return_alock, db_jlock, db_alock, db_app): ############################################### ## set up # create the journal journal = None jid = None if journal_id == "valid": journal = Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.remove_related_applications() journal.remove_current_application() jid = journal.id elif journal_id == "not_in_doaj": journal = Journal(**JournalFixtureFactory.make_journal_source(in_doaj=False)) journal.remove_related_applications() journal.remove_current_application() jid = journal.id elif journal_id == "missing": jid = uuid.uuid4().hex acc = None if account == "yes": acc = Account(**AccountFixtureFactory.make_publisher_source()) if account_role == "none": acc.remove_role("publisher") elif account_role == "admin": acc.remove_role("publisher") acc.add_role("admin") acc.set_id(acc.makeid()) if account_is_owner == "yes": acc.set_id(journal.owner) if journal_lock == "yes": lock.lock("journal", jid, "someoneelse", blocking=True) latest_app = None current_app_count = int(current_applications) for i in range(current_app_count): app = Suggestion(**ApplicationFixtureFactory.make_application_source()) app.set_id(app.makeid()) app.set_created("198" + str(i) + "-01-01T00:00:00Z") app.set_current_journal(jid) app.save() latest_app = app if journal is not None: journal.set_current_application(app.id) comp_app_count = int(completed_applications) for i in range(comp_app_count): app = Suggestion(**ApplicationFixtureFactory.make_application_source()) app.set_id(app.makeid()) app.set_created("197" + str(i) + "-01-01T00:00:00Z") app.set_related_journal(jid) app.save() if journal is not None: journal.add_related_application(app.id, date_accepted=app.created_date) if current_app_count == 0 and comp_app_count == 0: # save at least one record to initialise the index mapping, otherwise tests fail app = Suggestion(**ApplicationFixtureFactory.make_application_source()) app.set_id(app.makeid()) app.save() if application_lock == "yes": lock.lock("suggestion", latest_app.id, "someoneelse", blocking=True) if application_status != "n/a": latest_app.set_application_status(application_status) latest_app.save(blocking=True) # finally save the journal record, ensuring we get a blocking save, so everything # above here should be synchronised with the repo if journal is not None: journal.save(blocking=True) ########################################################### # Execution svc = DOAJ.applicationService() if raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.update_request_for_journal(jid, acc) else: application, jlock, alock = svc.update_request_for_journal(jid, acc) # we need to sleep, so the index catches up time.sleep(1) if return_app == "none": assert application is None elif return_app == "yes": assert application is not None if return_jlock == "none": assert jlock is None elif return_jlock == "yes": assert jlock is not None if return_alock == "none": assert alock is None elif return_alock == "yes": assert alock is not None if db_jlock == "no" and acc is not None: assert not lock.has_lock("journal", jid, acc.id) elif db_jlock == "yes" and acc is not None: assert lock.has_lock("journal", jid, acc.id) if db_alock == "no" and application.id is not None and acc is not None: assert not lock.has_lock("suggestion", application.id, acc.id) elif db_alock == "yes" and application.id is not None and acc is not None: assert lock.has_lock("suggestion", application.id, acc.id) if db_app == "no" and application.id is not None: indb = Suggestion.q2obj(q="id.exact:" + application.id) assert indb is None elif db_app == "yes" and application.id is not None: indb = Suggestion.q2obj(q="id.exact:" + application.id) assert indb is not None if current_app_count == 0 and comp_app_count == 0 and application is not None: assert application.article_metadata is None assert application.articles_last_year is None elif application is not None: assert application.article_metadata is not None assert application.articles_last_year is not None