def get_journal(cls, specs): journals = [] for spec in specs: source = JournalFixtureFactory.make_journal_source(in_doaj=True) j = Journal(**source) bj = j.bibjson() bj.title = spec.get("title", "Journal Title") bj.remove_identifiers() if "pissn" in spec: bj.add_identifier(bj.P_ISSN, spec.get("pissn")) if "eissn" in spec: bj.add_identifier(bj.E_ISSN, spec.get("eissn")) spec["instance"] = j journals.append(spec) def mock(self): bibjson = self.bibjson() # first, get the ISSNs associated with the record pissns = bibjson.get_identifiers(bibjson.P_ISSN) eissns = bibjson.get_identifiers(bibjson.E_ISSN) for j in journals: if j["pissn"] in pissns and j["eissn"] in eissns: return j["instance"] return mock
def test_has_permissions(self): journal_source = JournalFixtureFactory.make_journal_source() journal1 = Journal(**journal_source) publisher_owner_src = AccountFixtureFactory.make_publisher_source() publisher_owner = Account(**publisher_owner_src) publisher_stranged_src = AccountFixtureFactory.make_publisher_source() publisher_stranged = Account(**publisher_stranged_src) admin_src = AccountFixtureFactory.make_managing_editor_source() admin = Account(**admin_src) journal1.set_owner(publisher_owner) journal1.save(blocking=True) eissn = journal1.bibjson().get_one_identifier("eissn") pissn = journal1.bibjson().get_one_identifier("pissn") art_source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn) article = Article(**art_source) assert self.svc.has_permissions(publisher_stranged, article, False) assert self.svc.has_permissions(publisher_owner, article, True) assert self.svc.has_permissions(admin, article, True) failed_result = self.svc.has_permissions(publisher_stranged, article, True) assert failed_result == {'success': 0, 'fail': 1, 'update': 0, 'new': 0, 'shared': [], 'unowned': [pissn, eissn], 'unmatched': []}, "received: {}".format(failed_result)
def find_by_issn(cls, issns, owners): journals = [] seen_issns = [] for owner in owners: for eissn, pissn in issns: if eissn not in seen_issns and eissn is not None: seen_issns.append(eissn) if pissn not in seen_issns and pissn is not None: seen_issns.append(pissn) source = JournalFixtureFactory.make_journal_source( in_doaj=True) journal = Journal(**source) journal.set_owner(owner) journal.bibjson().remove_identifiers("eissn") journal.bibjson().remove_identifiers("pissn") if eissn is not None: journal.bibjson().add_identifier("eissn", eissn) if pissn is not None: journal.bibjson().add_identifier("pissn", pissn) journals.append(journal) @classmethod def mock(cls, issns, in_doaj=None, max=10): if not isinstance(issns, list): issns = [issns] for issn in issns: if issn in seen_issns: return journals return [] return mock
def journals_applications_provenance(outfile_applications, outfile_accounts, outfile_reapps, conn): with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open( outfile_accounts, "wb", "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h: out_applications = csv.writer(f) out_applications.writerow([ "Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Last Updated", "Application Status", "Published Diff", "Latest Edit Recorded", "Latest Accepted Recorded" ]) out_accounts = csv.writer(g) out_accounts.writerow([ "Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ", "Missing Account ID" ]) out_reapps = csv.writer(h) out_reapps.writerow([ "Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Created", "Application Last Updated", "Application Last Manual Update", "Application Status", "Published Diff" ]) counter = 0 for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"): counter += 1 journal = Journal(**result) print counter, journal.id # first figure out if there is a broken related application issns = journal.bibjson().issns() applications = Suggestion.find_by_issn(issns) latest = None for application in applications: if latest is None: latest = application if application.last_updated_timestamp > latest.last_updated_timestamp: latest = application if latest is None: continue jcreated = journal.created_timestamp reapp = journal.last_update_request print counter, journal.id, reapp if reapp is not None: jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ") jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF) app_lustamp = adjust_timestamp(latest.last_updated_timestamp, APP_TIMEZONE_CUTOFF) # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one app_man_lustamp = adjust_timestamp( latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF) td = jcreated - app_lustamp mtd = jcreated - app_man_lustamp diff = td.total_seconds() mdiff = mtd.total_seconds() # was the journal created after the application by greater than the threshold? if diff > THRESHOLD: last_edit = "" last_accept = "" edit_query = deepcopy(PROV_QUERY) edit_query["query"]["bool"]["must"][0]["term"][ "resource_id.exact"] = latest.id edit_query["query"]["bool"]["must"][1]["term"][ "action.exact"] = "edit" provs = Provenance.q2obj(q=edit_query) if len(provs) > 0: last_edit = provs[0].last_updated accept_query = deepcopy(PROV_QUERY) accept_query["query"]["bool"]["must"][0]["term"][ "resource_id.exact"] = latest.id accept_query["query"]["bool"]["must"][1]["term"][ "action.exact"] = "status:accepted" provs = Provenance.q2obj(q=accept_query) if len(provs) > 0: last_accept = provs[0].last_updated out_applications.writerow([ journal.id, journal.created_date, journal.last_update_request, latest.id, latest.last_updated, latest.application_status, diff, last_edit, last_accept ]) # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj( ): out_reapps.writerow([ journal.id, journal.created_date, journal.last_update_request, latest.id, latest.created_date, latest.last_updated, latest.last_manual_update, latest.application_status, mdiff ]) # now figure out if the account is missing owner = journal.owner if owner is None: out_accounts.writerow([ journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), "NO OWNER" ]) else: acc = Account.pull(owner) if acc is None: out_accounts.writerow([ journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), owner ]) print "processed", counter, "journals"
def add_journal_metadata(self, j=None, reg=None): """ this function makes sure the article is populated with all the relevant info from its owning parent object :param j: Pass in a Journal to bypass the (slow) locating step. MAKE SURE IT'S THE RIGHT ONE! """ # Record the data that is copied into the article into the "reg"ister, in case the # caller needs to know exactly and only which information was copied if reg is None: reg = Journal() rbj = reg.bibjson() if j is None: journal = self.get_journal() else: journal = j # we were unable to find a journal if journal is None: raise NoJournalException( "Unable to find a journal associated with this article") # if we get to here, we have a journal record we want to pull data from jbib = journal.bibjson() bibjson = self.bibjson() # tripwire to be tripped if the journal makes changes to the article trip = False if bibjson.subjects() != jbib.subjects(): trip = True bibjson.set_subjects(jbib.subjects()) rbj.set_subjects(jbib.subjects()) if jbib.title is not None: if bibjson.journal_title != jbib.title: trip = True bibjson.journal_title = jbib.title rbj.title = jbib.title if jbib.get_license() is not None: lic = jbib.get_license() alic = bibjson.get_journal_license() if lic is not None and ( alic is None or (lic.get("title") != alic.get("title") or lic.get("type") != alic.get("type") or lic.get("url") != alic.get("url") or lic.get("version") != alic.get("version") or lic.get("open_access") != alic.get("open_access"))): bibjson.set_journal_license(lic.get("title"), lic.get("type"), lic.get("url"), lic.get("version"), lic.get("open_access")) trip = True rbj.set_license(lic.get("title"), lic.get("type"), lic.get("url"), lic.get("version"), lic.get("open_access")) if len(jbib.language) > 0: jlang = jbib.language alang = bibjson.journal_language jlang.sort() alang.sort() if jlang != alang: bibjson.journal_language = jbib.language trip = True rbj.set_language(jbib.language) if jbib.country is not None: if jbib.country != bibjson.journal_country: bibjson.journal_country = jbib.country trip = True rbj.country = jbib.country if jbib.publisher: if jbib.publisher != bibjson.publisher: bibjson.publisher = jbib.publisher trip = True rbj.publisher = jbib.publisher # Copy the seal info, in_doaj status and the journal's ISSNs if journal.is_in_doaj() != self.is_in_doaj(): self.set_in_doaj(journal.is_in_doaj()) trip = True reg.set_in_doaj(journal.is_in_doaj()) if journal.has_seal() != self.has_seal(): self.set_seal(journal.has_seal()) trip = True reg.set_seal(journal.has_seal()) try: aissns = bibjson.journal_issns jissns = jbib.issns() aissns.sort() jissns.sort() if aissns != jissns: bibjson.journal_issns = jbib.issns() trip = True eissns = jbib.get_identifiers(jbib.E_ISSN) pissns = jbib.get_identifiers(jbib.P_ISSN) if eissns is not None and len(eissns) > 0: rbj.add_identifier(rbj.E_ISSN, eissns[0]) if pissns is not None and len(pissns) > 0: rbj.add_identifier(rbj.P_ISSN, pissns[0]) except KeyError: # No issns, don't worry about it for now pass return trip
def form2obj(form, existing_journal): journal = Journal() bibjson = journal.bibjson() # The if statements that wrap practically every field are there due to this # form being used to edit old journals which don't necessarily have most of # this info. # It also allows admins to delete the contents of any field if they wish, # by ticking the "Allow incomplete form" checkbox and deleting the contents # of that field. The if condition(s) will then *not* add the relevant field to the # new journal object being constructed. # add_url in the journal model has a safeguard against empty URL-s. if form.title.data: bibjson.title = form.title.data bibjson.add_url(form.url.data, urltype='homepage') if form.alternative_title.data: bibjson.alternative_title = form.alternative_title.data if form.pissn.data: bibjson.add_identifier(bibjson.P_ISSN, form.pissn.data) if form.eissn.data: bibjson.add_identifier(bibjson.E_ISSN, form.eissn.data) if form.publisher.data: bibjson.publisher = form.publisher.data if form.society_institution.data: bibjson.institution = form.society_institution.data if form.platform.data: bibjson.provider = form.platform.data if form.contact_name.data or form.contact_email.data: journal.add_contact(form.contact_name.data, form.contact_email.data) if form.country.data: bibjson.country = form.country.data if forms.interpret_special(form.processing_charges.data): bibjson.set_apc(form.processing_charges_currency.data, form.processing_charges_amount.data) if forms.interpret_special(form.submission_charges.data): bibjson.set_submission_charges(form.submission_charges_currency.data, form.submission_charges_amount.data) if forms.interpret_special(form.waiver_policy.data): bibjson.add_url(form.waiver_policy_url.data, 'waiver_policy') # checkboxes if forms.interpret_special(form.digital_archiving_policy.data) or form.digital_archiving_policy_url.data: archiving_policies = forms.interpret_special(form.digital_archiving_policy.data) archiving_policies = forms.interpret_other(archiving_policies, form.digital_archiving_policy_other.data, store_other_label=True) archiving_policies = forms.interpret_other(archiving_policies, form.digital_archiving_policy_library.data, forms.digital_archiving_policy_specific_library_value, store_other_label=True) bibjson.set_archiving_policy(archiving_policies, form.digital_archiving_policy_url.data) if form.crawl_permission.data and form.crawl_permission.data != 'None': bibjson.allows_fulltext_indexing = forms.interpret_special(form.crawl_permission.data) # just binary # checkboxes article_ids = forms.interpret_special(form.article_identifiers.data) article_ids = forms.interpret_other(article_ids, form.article_identifiers_other.data) if article_ids: bibjson.persistent_identifier_scheme = article_ids if (form.download_statistics.data and form.download_statistics.data != 'None') or form.download_statistics_url.data: bibjson.set_article_statistics(form.download_statistics_url.data, forms.interpret_special(form.download_statistics.data)) if form.first_fulltext_oa_year.data: bibjson.set_oa_start(year=form.first_fulltext_oa_year.data) # checkboxes fulltext_format = forms.interpret_other(form.fulltext_format.data, form.fulltext_format_other.data) if fulltext_format: bibjson.format = fulltext_format if form.keywords.data: bibjson.set_keywords(form.keywords.data) # tag list field if form.languages.data: bibjson.set_language(form.languages.data) # select multiple field - gives a list back bibjson.add_url(form.editorial_board_url.data, urltype='editorial_board') if form.review_process.data or form.review_process_url.data: bibjson.set_editorial_review(form.review_process.data, form.review_process_url.data) bibjson.add_url(form.aims_scope_url.data, urltype='aims_scope') bibjson.add_url(form.instructions_authors_url.data, urltype='author_instructions') if (form.plagiarism_screening.data and form.plagiarism_screening.data != 'None') or form.plagiarism_screening_url.data: bibjson.set_plagiarism_detection( form.plagiarism_screening_url.data, has_detection=forms.interpret_special(form.plagiarism_screening.data) ) if form.publication_time.data: bibjson.publication_time = form.publication_time.data bibjson.add_url(form.oa_statement_url.data, urltype='oa_statement') license_type = forms.interpret_other(form.license.data, form.license_other.data) if forms.interpret_special(license_type): # "None" and "False" as strings like they come out of the WTForms processing) # would get interpreted correctly by this check, so "None" licenses should not appear if license_type in licenses: by = licenses[license_type]['BY'] nc = licenses[license_type]['NC'] nd = licenses[license_type]['ND'] sa = licenses[license_type]['SA'] license_title = licenses[license_type]['title'] elif form.license_checkbox.data: by = True if 'BY' in form.license_checkbox.data else False nc = True if 'NC' in form.license_checkbox.data else False nd = True if 'ND' in form.license_checkbox.data else False sa = True if 'SA' in form.license_checkbox.data else False license_title = license_type else: by = None; nc = None; nd = None; sa = None; license_title = license_type bibjson.set_license( license_title, license_type, url=form.license_url.data, open_access=forms.interpret_special(form.open_access.data), by=by, nc=nc, nd=nd, sa=sa, embedded=forms.interpret_special(form.license_embedded.data), embedded_example_url=form.license_embedded_url.data ) # checkboxes deposit_policies = forms.interpret_special(form.deposit_policy.data) # need empty list if it's just "None" deposit_policies = forms.interpret_other(deposit_policies, form.deposit_policy_other.data) if deposit_policies: bibjson.deposit_policy = deposit_policies if form.copyright.data and form.copyright.data != 'None': holds_copyright = forms.interpret_other( forms.interpret_special(form.copyright.data), form.copyright_other.data ) bibjson.set_author_copyright(form.copyright_url.data, holds_copyright=holds_copyright) if form.publishing_rights.data and form.publishing_rights.data != 'None': publishing_rights = forms.interpret_other( forms.interpret_special(form.publishing_rights.data), form.publishing_rights_other.data ) bibjson.set_author_publishing_rights(form.publishing_rights_url.data, holds_rights=publishing_rights) # need to copy over the notes from the existing journal object, if any, otherwise # the dates on all the notes will get reset to right now (i.e. last_updated) # since the journal object we're creating in this xwalk is a new, empty one journal.set_notes(existing_journal.notes()) # generate index of notes, just the text curnotes = [] for curnote in journal.notes(): curnotes.append(curnote['note']) # add any new notes formnotes = [] for formnote in form.notes.data: if formnote['note']: if formnote['note'] not in curnotes and formnote["note"] != "": journal.add_note(formnote['note']) # also generate another text index of notes, this time an index of the form notes formnotes.append(formnote['note']) if current_user.has_role("delete_note"): # delete all notes not coming back from the form, means they've been deleted # also if one of the saved notes is completely blank, delete it for curnote in journal.notes()[:]: if not curnote['note'] or curnote['note'] not in formnotes: journal.remove_note(curnote) new_subjects = [] for code in form.subject.data: sobj = {"scheme": 'LCC', "term": lcc.lookup_code(code), "code": code} new_subjects.append(sobj) bibjson.set_subjects(new_subjects) owner = form.owner.data.strip() if owner: journal.set_owner(owner) editor_group = form.editor_group.data.strip() if editor_group: journal.set_editor_group(editor_group) editor = form.editor.data.strip() if editor: journal.set_editor(editor) # old fields - only create them in the journal record if the values actually exist # need to use interpret_special in the test condition in case 'None' comes back from the form if getattr(form, 'author_pays', None): if forms.interpret_special(form.author_pays.data): bibjson.author_pays = form.author_pays.data if getattr(form, 'author_pays_url', None): if forms.interpret_special(form.author_pays_url.data): bibjson.author_pays_url = form.author_pays_url.data if getattr(form, 'oa_end_year', None): if forms.interpret_special(form.oa_end_year.data): bibjson.set_oa_end(form.oa_end_year.data) return journal
def test_01_discover_duplicates(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_doi_arg = kwargs.get("article_doi") doi_duplicate_arg = kwargs.get("doi_duplicate") article_fulltext_arg = kwargs.get("article_fulltext") fulltext_duplicate_arg = kwargs.get("fulltext_duplicate") articles_by_doi_arg = kwargs.get("articles_by_doi") articles_by_fulltext_arg = kwargs.get("articles_by_fulltext") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # create a journal for the owner if owner_arg not in ["none"]: source = JournalFixtureFactory.make_journal_source(in_doaj=True) journal = Journal(**source) journal.set_owner(owner.id) journal.bibjson().remove_identifiers() journal.bibjson().add_identifier("eissn", "1234-5678") journal.bibjson().add_identifier("pissn", "9876-5432") journal.save() # determine what we need to load into the index article_ids = [] aids_block = [] if owner_arg not in ["none", "no_articles"]: for i, ident in enumerate(IDENTS): the_doi = ident["doi"] if doi_duplicate_arg == "padded": the_doi = " " + the_doi + " " elif doi_duplicate_arg == "prefixed": the_doi = "https://dx.doi.org/" + the_doi the_fulltext = ident["fulltext"] if article_fulltext_arg != "invalid": if fulltext_duplicate_arg == "padded": the_fulltext = " http:" + the_fulltext elif fulltext_duplicate_arg == "http": the_fulltext = "http:" + the_fulltext elif fulltext_duplicate_arg == "https": the_fulltext = "https:" + the_fulltext else: the_fulltext = "http:" + the_fulltext source = ArticleFixtureFactory.make_article_source( eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext) article = Article(**source) article.set_id() article.save(blocking=True) article_ids.append(article.id) aids_block.append((article.id, article.last_updated)) # generate our incoming article article = None doi = None fulltext = None if article_arg == "yes": eissn = "1234=5678" # one matching pissn = "6789-1234" # the other not - issn matches are not relevant to this test if article_doi_arg in ["yes", "padded"]: doi = "10.1234/abc/11" if doi_duplicate_arg in ["yes", "padded"]: doi = IDENTS[0]["doi"] if article_doi_arg == "padded": doi = " doi:" + doi + " " elif article_doi_arg in ["invalid"]: doi = IDENTS[-1]["doi"] if article_fulltext_arg in ["yes", "padded", "https"]: fulltext = "//example.com/11" if fulltext_duplicate_arg in ["yes", "padded", "https"]: fulltext = IDENTS[0]["fulltext"] if fulltext_duplicate_arg == "padded": fulltext = " http:" + fulltext + " " elif fulltext_duplicate_arg == "https": fulltext = "https:" + fulltext else: fulltext = "http:" + fulltext elif article_fulltext_arg == "invalid": fulltext = IDENTS[-1]["fulltext"] source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) article = Article(**source) # we need to do this if doi or fulltext are none, because the factory will set a default if we don't # provide them if doi is None: article.bibjson().remove_identifiers("doi") if fulltext is None: article.bibjson().remove_urls("fulltext") article.set_id() Article.blockall(aids_block) ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.discover_duplicates(article) else: possible_articles = svc.discover_duplicates(article) if articles_by_doi_arg == "yes": assert "doi" in possible_articles assert len(possible_articles["doi"]) == 1 # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first if article_doi_arg == "invalid": assert possible_articles["doi"][0].id == article_ids[-1] else: assert possible_articles["doi"][0].id == article_ids[0] else: if possible_articles is not None: assert "doi" not in possible_articles if articles_by_fulltext_arg == "yes": assert "fulltext" in possible_articles assert len(possible_articles["fulltext"]) == 1 # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first if article_fulltext_arg == "invalid": assert possible_articles["fulltext"][0].id == article_ids[ -1] else: assert possible_articles["fulltext"][0].id == article_ids[ 0] else: if possible_articles is not None: assert "fulltext" not in possible_articles
def form2obj(form, existing_journal): journal = Journal() bibjson = journal.bibjson() # The if statements that wrap practically every field are there due to this # form being used to edit old journals which don't necessarily have most of # this info. # It also allows admins to delete the contents of any field if they wish, # by ticking the "Allow incomplete form" checkbox and deleting the contents # of that field. The if condition(s) will then *not* add the relevant field to the # new journal object being constructed. # add_url in the journal model has a safeguard against empty URL-s. if form.title.data: bibjson.title = form.title.data bibjson.add_url(form.url.data, urltype='homepage') if form.alternative_title.data: bibjson.alternative_title = form.alternative_title.data if form.pissn.data: bibjson.add_identifier(bibjson.P_ISSN, form.pissn.data) if form.eissn.data: bibjson.add_identifier(bibjson.E_ISSN, form.eissn.data) if form.publisher.data: bibjson.publisher = form.publisher.data if form.society_institution.data: bibjson.institution = form.society_institution.data if form.platform.data: bibjson.provider = form.platform.data if form.contact_name.data or form.contact_email.data: journal.add_contact(form.contact_name.data, form.contact_email.data) if form.country.data: bibjson.country = form.country.data if forms.interpret_special(form.processing_charges.data): bibjson.set_apc(form.processing_charges_currency.data, form.processing_charges_amount.data) if forms.interpret_special(form.submission_charges.data): bibjson.set_submission_charges( form.submission_charges_currency.data, form.submission_charges_amount.data) if forms.interpret_special(form.waiver_policy.data): bibjson.add_url(form.waiver_policy_url.data, 'waiver_policy') # checkboxes if forms.interpret_special(form.digital_archiving_policy.data ) or form.digital_archiving_policy_url.data: archiving_policies = forms.interpret_special( form.digital_archiving_policy.data) archiving_policies = forms.interpret_other( archiving_policies, form.digital_archiving_policy_other.data, store_other_label=True) archiving_policies = forms.interpret_other( archiving_policies, form.digital_archiving_policy_library.data, forms.digital_archiving_policy_specific_library_value, store_other_label=True) bibjson.set_archiving_policy( archiving_policies, form.digital_archiving_policy_url.data) if form.crawl_permission.data and form.crawl_permission.data != 'None': bibjson.allows_fulltext_indexing = forms.interpret_special( form.crawl_permission.data) # just binary # checkboxes article_ids = forms.interpret_special(form.article_identifiers.data) article_ids = forms.interpret_other( article_ids, form.article_identifiers_other.data) if article_ids: bibjson.persistent_identifier_scheme = article_ids if (form.download_statistics.data and form.download_statistics.data != 'None') or form.download_statistics_url.data: bibjson.set_article_statistics( form.download_statistics_url.data, forms.interpret_special(form.download_statistics.data)) if form.first_fulltext_oa_year.data: bibjson.set_oa_start(year=form.first_fulltext_oa_year.data) # checkboxes fulltext_format = forms.interpret_other( form.fulltext_format.data, form.fulltext_format_other.data) if fulltext_format: bibjson.format = fulltext_format if form.keywords.data: bibjson.set_keywords(form.keywords.data) # tag list field if form.languages.data: bibjson.set_language(form.languages.data ) # select multiple field - gives a list back bibjson.add_url(form.editorial_board_url.data, urltype='editorial_board') if form.review_process.data or form.review_process_url.data: bibjson.set_editorial_review(form.review_process.data, form.review_process_url.data) bibjson.add_url(form.aims_scope_url.data, urltype='aims_scope') bibjson.add_url(form.instructions_authors_url.data, urltype='author_instructions') if (form.plagiarism_screening.data and form.plagiarism_screening.data != 'None') or form.plagiarism_screening_url.data: bibjson.set_plagiarism_detection( form.plagiarism_screening_url.data, has_detection=forms.interpret_special( form.plagiarism_screening.data)) if form.publication_time.data: bibjson.publication_time = form.publication_time.data bibjson.add_url(form.oa_statement_url.data, urltype='oa_statement') license_type = forms.interpret_other(form.license.data, form.license_other.data) if forms.interpret_special(license_type): # "None" and "False" as strings like they come out of the WTForms processing) # would get interpreted correctly by this check, so "None" licenses should not appear if license_type in licenses: by = licenses[license_type]['BY'] nc = licenses[license_type]['NC'] nd = licenses[license_type]['ND'] sa = licenses[license_type]['SA'] license_title = licenses[license_type]['title'] elif form.license_checkbox.data: by = True if 'BY' in form.license_checkbox.data else False nc = True if 'NC' in form.license_checkbox.data else False nd = True if 'ND' in form.license_checkbox.data else False sa = True if 'SA' in form.license_checkbox.data else False license_title = license_type else: by = None nc = None nd = None sa = None license_title = license_type bibjson.set_license( license_title, license_type, url=form.license_url.data, open_access=forms.interpret_special(form.open_access.data), by=by, nc=nc, nd=nd, sa=sa, embedded=forms.interpret_special(form.license_embedded.data), embedded_example_url=form.license_embedded_url.data) # checkboxes deposit_policies = forms.interpret_special( form.deposit_policy.data) # need empty list if it's just "None" deposit_policies = forms.interpret_other( deposit_policies, form.deposit_policy_other.data) if deposit_policies: bibjson.deposit_policy = deposit_policies if form.copyright.data and form.copyright.data != 'None': holds_copyright = forms.interpret_other( forms.interpret_special(form.copyright.data), form.copyright_other.data) bibjson.set_author_copyright(form.copyright_url.data, holds_copyright=holds_copyright) if form.publishing_rights.data and form.publishing_rights.data != 'None': publishing_rights = forms.interpret_other( forms.interpret_special(form.publishing_rights.data), form.publishing_rights_other.data) bibjson.set_author_publishing_rights( form.publishing_rights_url.data, holds_rights=publishing_rights) # need to copy over the notes from the existing journal object, if any, otherwise # the dates on all the notes will get reset to right now (i.e. last_updated) # since the journal object we're creating in this xwalk is a new, empty one journal.set_notes(existing_journal.notes()) # generate index of notes, just the text curnotes = [] for curnote in journal.notes(): curnotes.append(curnote['note']) # add any new notes formnotes = [] for formnote in form.notes.data: if formnote['note']: if formnote['note'] not in curnotes and formnote["note"] != "": journal.add_note(formnote['note']) # also generate another text index of notes, this time an index of the form notes formnotes.append(formnote['note']) if current_user.has_role("delete_note"): # delete all notes not coming back from the form, means they've been deleted # also if one of the saved notes is completely blank, delete it for curnote in journal.notes()[:]: if not curnote['note'] or curnote['note'] not in formnotes: journal.remove_note(curnote) new_subjects = [] for code in form.subject.data: sobj = { "scheme": 'LCC', "term": lcc.lookup_code(code), "code": code } new_subjects.append(sobj) bibjson.set_subjects(new_subjects) owner = form.owner.data.strip() if owner: journal.set_owner(owner) editor_group = form.editor_group.data.strip() if editor_group: journal.set_editor_group(editor_group) editor = form.editor.data.strip() if editor: journal.set_editor(editor) # old fields - only create them in the journal record if the values actually exist # need to use interpret_special in the test condition in case 'None' comes back from the form if getattr(form, 'author_pays', None): if forms.interpret_special(form.author_pays.data): bibjson.author_pays = form.author_pays.data if getattr(form, 'author_pays_url', None): if forms.interpret_special(form.author_pays_url.data): bibjson.author_pays_url = form.author_pays_url.data if getattr(form, 'oa_end_year', None): if forms.interpret_special(form.oa_end_year.data): bibjson.set_oa_end(form.oa_end_year.data) return journal
def test_01_create_article(self, name, kwargs): article_arg = kwargs.get("article") article_duplicate_arg = kwargs.get("article_duplicate") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") add_journal_info_arg = kwargs.get("add_journal_info") dry_run_arg = kwargs.get("dry_run") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") original_saved_arg = kwargs.get("original_saved") merge_saved_arg = kwargs.get("merge_saved") ############################################### ## set up success = int(success_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False dry_run = None if dry_run_arg != "none": dry_run = True if dry_run_arg == "true" else False raises = EXCEPTIONS.get(raises_arg) eissn = "1234-5678" pissn = "9876-5432" if add_journal_info: jsource = JournalFixtureFactory.make_journal_source(in_doaj=True) j = Journal(**jsource) bj = j.bibjson() bj.title = "Add Journal Info Title" bj.remove_identifiers() bj.add_identifier(bj.P_ISSN, pissn) bj.add_identifier(bj.E_ISSN, eissn) j.save(blocking=True) article = None original_id = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") del source["bibjson"]["journal"] article = Article(**source) article.set_id() original_id = article.id account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) legit = True if account_arg == "owner" else False ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit) self.svc.is_legitimate_owner = ilo_mock owned = [eissn, pissn] if account_arg == "owner" else [] shared = [] unowned = [eissn] if account_arg == "not_owner" else [] unmatched = [pissn] if account_arg == "not_owner" else [] ios_mock = BLLArticleMockFactory.issn_ownership_status( owned, shared, unowned, unmatched) self.svc.issn_ownership_status = ios_mock gd_mock = None if article_duplicate_arg == "yes": gd_mock = BLLArticleMockFactory.get_duplicate( eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock mock_article = self.svc.get_duplicate(article) ########################################################### # Execution if raises is not None: with self.assertRaises(raises): self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) else: report = self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) assert report["success"] == success # check that the article was saved and if it was saved that it was suitably merged if original_saved_arg == "yes": original = Article.pull(original_id) assert original is not None assert report["update"] == 0 elif article is not None: original = Article.pull(original_id) assert original is None if merge_saved_arg == "yes": merged = Article.pull(mock_article.id) assert merged is not None assert report["update"] == 1 elif mock_article is not None: merged = Article.pull(mock_article.id) assert merged is None if add_journal_info: assert article.bibjson( ).journal_title == "Add Journal Info Title"
class TestCreateOrUpdateArticle(DoajTestCase): def setUp(self): super(TestCreateOrUpdateArticle, self).setUp() self.publisher = Account() self.publisher.add_role("publisher") self.publisher.save(blocking=True) self.admin = Account() self.admin.add_role("admin") self.admin.save(blocking=True) sources = JournalFixtureFactory.make_many_journal_sources(2, True) self.journal1 = Journal(**sources[0]) self.journal1.set_owner(self.publisher.id) jbib1 = self.journal1.bibjson() jbib1.add_identifier(jbib1.P_ISSN, "1111-1111") jbib1.add_identifier(jbib1.E_ISSN, "2222-2222") self.journal1.save(blocking=True) self.publisher.add_journal(self.journal1) self.journal2 = Journal(**sources[1]) jbib2 = self.journal2.bibjson() jbib2.add_identifier(jbib2.P_ISSN, "1234-5678") jbib2.add_identifier(jbib2.E_ISSN, "9876-5432") self.journal2.save(blocking=True) self.article10 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-10", fulltext="https://www.article10.com")) self.article10.set_id("articleid10") self.article10.save(blocking=True) self.article11 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-11", fulltext="https://www.article11.com")) self.article11.set_id("articleid11") self.article11.save(blocking=True) self.article2 = Article(**ArticleFixtureFactory.make_article_source( pissn="1234-5678", eissn="9876-5432", doi="10.0000/article-2", fulltext="https://www.article2.com")) self.article2.set_id("articleid2") self.article2.save(blocking=True) def tearDown(self): super(TestCreateOrUpdateArticle, self).tearDown() def test_00_no_doi_and_url_changed(self): ba = self.article10.bibjson() ba.title = "Updated Article" # try for admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 updated, received: {}".format( resp) assert self.article10.bibjson().title == "Updated Article", "Expected `Updated Article`, received: {}" \ .format(self.article10.bibjson().title) ba.title = "Updated 2nd time" # try for publisher resp = ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 updated, received: {}".format( resp) assert self.article10.bibjson().title == "Updated 2nd time", "Expected `Updated 2nd time`, received: {}" \ .format(self.article10.bibjson().title) def test_01_new_doi_new_url(self): ba = self.article10.bibjson() ba.remove_identifiers(ba.DOI) ba.remove_urls(ba.FULLTEXT) ba.add_identifier(ba.DOI, "10.0000/NEW") ba.add_url(ba.FULLTEXT, "https://www.UPDATED.com") #for publisher resp = ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) assert resp["success"] == 1, "expected 1 new, received: {}".format( resp) assert resp["update"] == 0, "expected 1 new, received: {}".format(resp) assert resp["new"] == 1, "expected 1 new, received: {}".format(resp) #for admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 new, received: {}".format( resp) assert resp["update"] == 1, "expected 1 new, received: {}".format(resp) assert resp["new"] == 0, "expected 1 new, received: {}".format(resp) def test_02_old_doi_existing_url_admin(self): ba = self.article10.bibjson() ba.remove_urls(ba.FULLTEXT) # check for url from other article owned by the same publisher ba.add_url(self.article11.bibjson().get_single_url(ba.FULLTEXT), ba.FULLTEXT) # try as a publisher with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) # check for url from other article owned by someone else ba.remove_urls(ba.FULLTEXT) ba.add_url(self.article2.bibjson().get_single_url(ba.FULLTEXT), ba.FULLTEXT) # try as a publisher with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) def test_03_existing_doi_old_url_admin(self): ba = self.article10.bibjson() ba.remove_identifiers(ba.DOI) # check for DOI from other article owned by the same publisher ba.add_identifier(ba.DOI, "10.0000/article-11") # try as a publisher with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) ba.remove_identifiers(ba.DOI) # check for DOI from other article owned by someone else ba.add_identifier(ba.DOI, "10.0000/article-2") # try as a publisher with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) def test_04_old_doi_new_url(self): ba = self.article10.bibjson() ba.remove_urls(ba.FULLTEXT) ba.add_url("https://updated.com", ba.FULLTEXT) # try as publisher with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 new, received: {}".format(resp) assert self.article10.get_normalised_fulltext( ) == "//updated.com", "expected //updated.com, received: {}".format( self.article10.get_normalised_fulltext()) def test_05_new_doi_old_url(self): ba = self.article10.bibjson() ba.remove_identifiers(ba.DOI) ba.add_identifier(ba.DOI, "10.0000/article-UPDATED") # try as publisher with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 updated, received: {}".format( resp) assert self.article10.get_normalised_doi() == "10.0000/article-UPDATED", \ "expected 10.0000/article-UPDATED, received: {}".format( self.article10.get_normalised_fulltext()) def test_06_existing_doi_new_url(self): ba = self.article10.bibjson() ba.remove_urls(ba.FULLTEXT) ba.add_url("https://updated.com", ba.FULLTEXT) # check for doi from other article of the same publisher ba.remove_identifiers(ba.DOI) ba.add_identifier(ba.DOI, self.article11.bibjson().get_one_identifier(ba.DOI)) # try as publisher with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) def test_07_new_doi_existing_url(self): ba = self.article10.bibjson() ba.remove_urls(ba.FULLTEXT) ba.add_url(self.article11.bibjson().get_single_url(ba.FULLTEXT), ba.FULLTEXT) # check for doi from other article of the same publisher ba.remove_identifiers(ba.DOI) ba.add_identifier(ba.DOI, "10.0000/article-UPDATED") # try as publisher with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id)
def add_journal_metadata(self, j=None, reg=None): """ this function makes sure the article is populated with all the relevant info from its owning parent object :param j: Pass in a Journal to bypass the (slow) locating step. MAKE SURE IT'S THE RIGHT ONE! """ # Record the data that is copied into the article into the "reg"ister, in case the # caller needs to know exactly and only which information was copied if reg is None: reg = Journal() rbj = reg.bibjson() if j is None: journal = self.get_journal() else: journal = j # we were unable to find a journal if journal is None: raise NoJournalException("Unable to find a journal associated with this article") # if we get to here, we have a journal record we want to pull data from jbib = journal.bibjson() bibjson = self.bibjson() # tripwire to be tripped if the journal makes changes to the article trip = False if bibjson.subjects() != jbib.subjects(): trip = True bibjson.set_subjects(jbib.subjects()) rbj.set_subjects(jbib.subjects()) if jbib.title is not None: if bibjson.journal_title != jbib.title: trip = True bibjson.journal_title = jbib.title rbj.title = jbib.title if jbib.get_license() is not None: lic = jbib.get_license() alic = bibjson.get_journal_license() if lic is not None and (alic is None or (lic.get("title") != alic.get("title") or lic.get("type") != alic.get("type") or lic.get("url") != alic.get("url") or lic.get("version") != alic.get("version") or lic.get("open_access") != alic.get("open_access"))): bibjson.set_journal_license(lic.get("title"), lic.get("type"), lic.get("url"), lic.get("version"), lic.get("open_access")) trip = True rbj.set_license(lic.get("title"), lic.get("type"), lic.get("url"), lic.get("version"), lic.get("open_access")) if len(jbib.language) > 0: jlang = jbib.language alang = bibjson.journal_language jlang.sort() alang.sort() if jlang != alang: bibjson.journal_language = jbib.language trip = True rbj.set_language(jbib.language) if jbib.country is not None: if jbib.country != bibjson.journal_country: bibjson.journal_country = jbib.country trip = True rbj.country = jbib.country if jbib.publisher: if jbib.publisher != bibjson.publisher: bibjson.publisher = jbib.publisher trip = True rbj.publisher = jbib.publisher # Copy the seal info, in_doaj status and the journal's ISSNs if journal.is_in_doaj() != self.is_in_doaj(): self.set_in_doaj(journal.is_in_doaj()) trip = True reg.set_in_doaj(journal.is_in_doaj()) if journal.has_seal() != self.has_seal(): self.set_seal(journal.has_seal()) trip = True reg.set_seal(journal.has_seal()) try: aissns = bibjson.journal_issns jissns = jbib.issns() aissns.sort() jissns.sort() if aissns != jissns: bibjson.journal_issns = jbib.issns() trip = True eissns = jbib.get_identifiers(jbib.E_ISSN) pissns = jbib.get_identifiers(jbib.P_ISSN) if eissns is not None and len(eissns) > 0: rbj.add_identifier(rbj.E_ISSN, eissns[0]) if pissns is not None and len(pissns) > 0: rbj.add_identifier(rbj.P_ISSN, pissns[0]) except KeyError: # No issns, don't worry about it for now pass return trip
def test_01_create_article(self, value, kwargs): article_arg = kwargs.get("article") account_arg = kwargs.get("account") get_duplicate_result_arg = kwargs.get("get_duplicate_result") role_arg = kwargs.get("role") merge_duplicate_arg = kwargs.get("merge_duplicate") add_journal_info_arg = kwargs.get("add_journal_info") dry_run_arg = kwargs.get("dry_run") update_article_id_arg = kwargs.get("update_article_id") has_ft_doi_changed_arg = kwargs.get("has_ft_doi_changed_arg") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") original_saved_arg = kwargs.get("original_saved") merge_saved_arg = kwargs.get("merge_saved") ############################################### ## set up success = int(success_arg) has_ft_doi_changed = True if has_ft_doi_changed_arg == "yes" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False dry_run = None if dry_run_arg != "none": dry_run = True if dry_run_arg == "true" else False raises = EXCEPTIONS.get(raises_arg) eissn = "1234-5678" pissn = "9876-5432" doi = "10.123/abc/1" fulltext = "http://example.com/1" another_doi = "10.123/duplicate-1" another_eissn = "1111-1111" another_pissn = "2222-2222" duplicate_id = None original_id = None update_article_id = None if add_journal_info: jsource = JournalFixtureFactory.make_journal_source(in_doaj=True) j = Journal(**jsource) bj = j.bibjson() bj.title = "Add Journal Info Title" bj.remove_identifiers() bj.add_identifier(bj.P_ISSN, pissn) bj.add_identifier(bj.E_ISSN, eissn) j.save(blocking=True) if get_duplicate_result_arg == 'different': source = ArticleFixtureFactory.make_article_source( eissn=another_eissn, pissn=another_pissn, doi=doi, fulltext=fulltext) del source["bibjson"]["journal"] duplicate = Article(**source) duplicate.save() duplicate_id = duplicate.id article_id_to_upload = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) del source["bibjson"]["journal"] article = Article(**source) article.set_id() article_id_to_upload = article.id if get_duplicate_result_arg == "itself": source = ArticleFixtureFactory.make_article_source( eissn=another_eissn, pissn=another_pissn, doi=doi, fulltext=fulltext) del source["bibjson"]["journal"] duplicate = Article(**source) duplicate.set_id(article_id_to_upload) duplicate.save() duplicate_id = duplicate.id if update_article_id_arg != "none": another_source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) original = Article(**another_source) original.save(blocking=True) original_id = original.id if update_article_id_arg == "doi_ft_not_changed": article.bibjson().title = "This needs to be updated" elif update_article_id_arg == "doi_ft_changed_duplicate": article.bibjson().remove_identifiers("doi") article.bibjson().add_identifier("doi", another_doi) elif update_article_id_arg == "doi_ft_changed_ok": article.bibjson().remove_identifiers("doi") article.bibjson().add_identifier("doi", "10.1234/updated") else: update_article_id = None account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) legit = True if account_arg == "owner" else False ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit) self.svc.is_legitimate_owner = ilo_mock owned = [eissn, pissn] if account_arg == "owner" else [] shared = [] unowned = [eissn] if account_arg == "not_owner" else [] unmatched = [pissn] if account_arg == "not_owner" else [] ios_mock = BLLArticleMockFactory.issn_ownership_status( owned, shared, unowned, unmatched) self.svc.issn_ownership_status = ios_mock if role_arg == "admin": account.set_role("admin") account.save() if get_duplicate_result_arg == "none": gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) elif get_duplicate_result_arg == "itself": gd_mock = BLLArticleMockFactory.get_duplicate( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext, given_article_id=original_id) elif get_duplicate_result_arg == "different": gd_mock = BLLArticleMockFactory.get_duplicate( eissn=another_eissn, pissn=another_pissn, doi=doi, fulltext=fulltext, given_article_id=duplicate_id) else: gd_mock = BLLArticleMockFactory.get_duplicate( given_article_id="exception") self.svc.get_duplicate = gd_mock mock_article = self.svc.get_duplicate(article) if role_arg == "admin" or (role_arg == "publisher" and account_arg == "owner"): has_permissions_mock = BLLArticleMockFactory.has_permissions(True) else: has_permissions_mock = BLLArticleMockFactory.has_permissions(False) self.svc.has_permissions = has_permissions_mock prepare_update_admin_mock = BLLArticleMockFactory._prepare_update_admin( get_duplicate_result_arg, update_article_id_arg) self.svc._prepare_update_admin = prepare_update_admin_mock prepare_update_publisher_mock = BLLArticleMockFactory._prepare_update_publisher( get_duplicate_result_arg, has_ft_doi_changed) self.svc._prepare_update_publisher = prepare_update_publisher_mock ########################################################### # Execution if raises is not None: with self.assertRaises(raises): self.svc.create_article(article, account, merge_duplicate=merge_duplicate, add_journal_info=add_journal_info, dry_run=dry_run, update_article_id=original_id) else: report = self.svc.create_article(article, account, merge_duplicate=merge_duplicate, add_journal_info=add_journal_info, dry_run=dry_run, update_article_id=original_id) assert report["success"] == success # check that the article was saved and if it was saved that it was suitably merged if original_saved_arg == "yes" and update_article_id is not None: if get_duplicate_result_arg == "itself": original = Article.pull(update_article_id) assert original is not None assert report["update"] == 1, "update: {}".format( report["update"]) assert report["new"] == 0, "update: {}".format( report["new"]) elif original_saved_arg == "yes": if get_duplicate_result_arg == "itself": new = Article.pull(article_id_to_upload) assert new is not None assert report["update"] == 1, "update: {}".format( report["update"]) assert report["new"] == 0, "update: {}".format( report["new"]) elif get_duplicate_result_arg == "none": new = Article.pull(article_id_to_upload) assert new is not None assert report["update"] == 0, "update: {}".format( report["update"]) assert report["new"] == 1, "update: {}".format( report["new"]) if merge_saved_arg == "yes": merged = Article.pull(mock_article.id) assert merged is not None assert report["update"] == 1 elif mock_article is not None and mock_article.id != original_id: merged = Article.pull(mock_article.id) assert merged is None, "merged: {}".format(merged) if add_journal_info: assert article.bibjson( ).journal_title == "Add Journal Info Title" if update_article_id_arg == "doi_ft_changed_ok": original = Article.pull(original_id) assert original is not None elif update_article_id_arg == "doi_ft_not_changed": original = Article.pull(original_id) assert original is not None
def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises): # set up for the test ######################################### cj = None has_seal = bool(randint(0, 1)) application = None if application_type == "present": application = Suggestion(**ApplicationFixtureFactory.make_application_source()) application.set_id(application.makeid()) application.remove_contacts() application.remove_editor_group() application.remove_editor() application.remove_owner() application.remove_current_journal() application.remove_notes() if app_key_properties == "yes": application.add_contact("Application", "*****@*****.**") application.set_editor_group("appeditorgroup") application.set_editor("appeditor") application.set_owner("appowner") application.set_seal(has_seal) application.add_note("Application Note") if current_journal == "present": journal = Journal(**JournalFixtureFactory.make_journal_source()) journal.remove_contacts() journal.add_contact("Journal", "*****@*****.**") journal.set_editor_group("journaleditorgroup") journal.set_editor("journaleditor") journal.set_owner("journalowner") journal.remove_current_application() journal.remove_notes() journal.add_note("Journal Note") journal.save(blocking=True) application.set_current_journal(journal.id) cj = journal elif current_journal == "missing": application.set_current_journal("123456789987654321") manual_update = None if manual_update_arg == "true": manual_update = True elif manual_update_arg == "false": manual_update = False # execute the test ######################################## svc = DOAJ.applicationService() if raises is not None and raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.application_2_journal(application, manual_update) else: journal = svc.application_2_journal(application, manual_update) # check the result ###################################### assert journal is not None assert isinstance(journal, Journal) assert journal.is_in_doaj() is True jbj = journal.bibjson().data del jbj["active"] assert jbj == application.bibjson().data if current_journal == "present": assert len(journal.related_applications) == 3 else: assert len(journal.related_applications) == 1 related = journal.related_application_record(application.id) assert related is not None if manual_update_arg == "true": assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z" if app_key_properties == "yes": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Application" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "appeditorgroup" assert journal.editor == "appeditor" assert journal.owner == "appowner" assert journal.has_seal() == has_seal if current_journal == "present": assert len(journal.notes) == 2 else: assert len(journal.notes) == 1 elif app_key_properties == "no": if current_journal == "present": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Journal" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "journaleditorgroup" assert journal.editor == "journaleditor" assert journal.owner == "journalowner" assert journal.has_seal() == has_seal assert len(journal.notes) == 2 elif current_journal == "none" or current_journal == "missing": contacts = journal.contacts() assert len(contacts) == 0 assert journal.editor_group is None assert journal.editor is None assert journal.owner is None assert journal.has_seal() == has_seal assert len(journal.notes) == 1 if current_journal == "present": assert cj.id == journal.id assert cj.created_date == journal.created_date
def test_01_discover_duplicates(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_doi_arg = kwargs.get("article_doi") doi_duplicate_arg = kwargs.get("doi_duplicate") article_fulltext_arg = kwargs.get("article_fulltext") fulltext_duplicate_arg = kwargs.get("fulltext_duplicate") articles_by_doi_arg = kwargs.get("articles_by_doi") articles_by_fulltext_arg = kwargs.get("articles_by_fulltext") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # create a journal for the owner if owner_arg not in ["none"]: source = JournalFixtureFactory.make_journal_source(in_doaj=True) journal = Journal(**source) journal.set_owner(owner.id) journal.bibjson().remove_identifiers() journal.bibjson().add_identifier("eissn", "1234-5678") journal.bibjson().add_identifier("pissn", "9876-5432") journal.save(blocking=True) # determine what we need to load into the index article_ids = [] aids_block = [] if owner_arg not in ["none", "no_articles"]: for i, ident in enumerate(IDENTS): the_doi = ident["doi"] if doi_duplicate_arg == "padded": the_doi = " " + the_doi + " " elif doi_duplicate_arg == "prefixed": the_doi = "https://dx.doi.org/" + the_doi the_fulltext = ident["fulltext"] if article_fulltext_arg != "invalid": if fulltext_duplicate_arg == "padded": the_fulltext = " http:" + the_fulltext elif fulltext_duplicate_arg == "http": the_fulltext = "http:" + the_fulltext elif fulltext_duplicate_arg == "https": the_fulltext = "https:" + the_fulltext else: the_fulltext = "http:" + the_fulltext source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext) article = Article(**source) article.set_id() article.save() article_ids.append(article.id) aids_block.append((article.id, article.last_updated)) # generate our incoming article article = None doi = None fulltext = None if article_arg == "yes": eissn = "1234=5678" # one matching pissn = "6789-1234" # the other not - issn matches are not relevant to this test if article_doi_arg in ["yes", "padded"]: doi = "10.1234/abc/11" if doi_duplicate_arg in ["yes", "padded"]: doi = IDENTS[0]["doi"] if article_doi_arg == "padded": doi = " doi:" + doi + " " elif article_doi_arg in ["invalid"]: doi = IDENTS[-1]["doi"] if article_fulltext_arg in ["yes", "padded", "https"]: fulltext = "//example.com/11" if fulltext_duplicate_arg in ["yes", "padded", "https"]: fulltext = IDENTS[0]["fulltext"] if fulltext_duplicate_arg == "padded": fulltext = " http:" + fulltext + " " elif fulltext_duplicate_arg == "https": fulltext = "https:" + fulltext else: fulltext = "http:" + fulltext elif article_fulltext_arg == "invalid": fulltext = IDENTS[-1]["fulltext"] source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) article = Article(**source) # we need to do this if doi or fulltext are none, because the factory will set a default if we don't # provide them if doi is None: article.bibjson().remove_identifiers("doi") if fulltext is None: article.bibjson().remove_urls("fulltext") article.set_id() Article.blockall(aids_block) ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.discover_duplicates(article, owner_id) else: possible_articles = svc.discover_duplicates(article, owner_id) if articles_by_doi_arg == "yes": assert "doi" in possible_articles assert len(possible_articles["doi"]) == 1 # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first if article_doi_arg == "invalid": assert possible_articles["doi"][0].id == article_ids[-1] else: assert possible_articles["doi"][0].id == article_ids[0] else: if possible_articles is not None: assert "doi" not in possible_articles if articles_by_fulltext_arg == "yes": assert "fulltext" in possible_articles assert len(possible_articles["fulltext"]) == 1 # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first if article_fulltext_arg == "invalid": assert possible_articles["fulltext"][0].id == article_ids[-1] else: assert possible_articles["fulltext"][0].id == article_ids[0] else: if possible_articles is not None: assert "fulltext" not in possible_articles
def test_01_create_article(self, name, kwargs): article_arg = kwargs.get("article") article_duplicate_arg = kwargs.get("article_duplicate") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") add_journal_info_arg = kwargs.get("add_journal_info") dry_run_arg = kwargs.get("dry_run") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") original_saved_arg = kwargs.get("original_saved") merge_saved_arg = kwargs.get("merge_saved") ############################################### ## set up success = int(success_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False dry_run = None if dry_run_arg != "none": dry_run = True if dry_run_arg == "true" else False raises = EXCEPTIONS.get(raises_arg) eissn = "1234-5678" pissn = "9876-5432" if add_journal_info: jsource = JournalFixtureFactory.make_journal_source(in_doaj=True) j = Journal(**jsource) bj = j.bibjson() bj.title = "Add Journal Info Title" bj.remove_identifiers() bj.add_identifier(bj.P_ISSN, pissn) bj.add_identifier(bj.E_ISSN, eissn) j.save(blocking=True) article = None original_id = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") del source["bibjson"]["journal"] article = Article(**source) article.set_id() original_id = article.id account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) legit = True if account_arg == "owner" else False ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit) self.svc.is_legitimate_owner = ilo_mock owned = [eissn, pissn] if account_arg == "owner" else [] shared = [] unowned = [eissn] if account_arg == "not_owner" else [] unmatched = [pissn] if account_arg == "not_owner" else [] ios_mock = BLLArticleMockFactory.issn_ownership_status(owned, shared, unowned, unmatched) self.svc.issn_ownership_status = ios_mock gd_mock = None if article_duplicate_arg == "yes": gd_mock = BLLArticleMockFactory.get_duplicate(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock mock_article = self.svc.get_duplicate(article) ########################################################### # Execution if raises is not None: with self.assertRaises(raises): self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) else: report = self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) assert report["success"] == success # check that the article was saved and if it was saved that it was suitably merged if original_saved_arg == "yes": original = Article.pull(original_id) assert original is not None assert report["update"] == 0 elif article is not None: original = Article.pull(original_id) assert original is None if merge_saved_arg == "yes": merged = Article.pull(mock_article.id) assert merged is not None assert report["update"] == 1 elif mock_article is not None: merged = Article.pull(mock_article.id) assert merged is None if add_journal_info: assert article.bibjson().journal_title == "Add Journal Info Title"
def journals_applications_provenance(outfile_applications, outfile_accounts, outfile_reapps, conn): with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open(outfile_accounts, "wb", "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h: out_applications = csv.writer(f) out_applications.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Last Updated", "Application Status", "Published Diff", "Latest Edit Recorded", "Latest Accepted Recorded"]) out_accounts = csv.writer(g) out_accounts.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ", "Missing Account ID"]) out_reapps = csv.writer(h) out_reapps.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Created", "Application Last Updated", "Application Last Manual Update", "Application Status", "Published Diff"]) counter = 0 for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"): counter += 1 journal = Journal(**result) print counter, journal.id # first figure out if there is a broken related application issns = journal.bibjson().issns() applications = Suggestion.find_by_issn(issns) latest = None for application in applications: if latest is None: latest = application if application.last_updated_timestamp > latest.last_updated_timestamp: latest = application if latest is None: continue jcreated = journal.created_timestamp reapp = journal.last_update_request print counter, journal.id, reapp if reapp is not None: jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ") jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF) app_lustamp = adjust_timestamp(latest.last_updated_timestamp, APP_TIMEZONE_CUTOFF) # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one app_man_lustamp = adjust_timestamp(latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF) td = jcreated - app_lustamp mtd = jcreated - app_man_lustamp diff = td.total_seconds() mdiff = mtd.total_seconds() # was the journal created after the application by greater than the threshold? if diff > THRESHOLD: last_edit = "" last_accept = "" edit_query = deepcopy(PROV_QUERY) edit_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id edit_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "edit" provs = Provenance.q2obj(q=edit_query) if len(provs) > 0: last_edit = provs[0].last_updated accept_query = deepcopy(PROV_QUERY) accept_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id accept_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "status:accepted" provs = Provenance.q2obj(q=accept_query) if len(provs) > 0: last_accept = provs[0].last_updated out_applications.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.last_updated, latest.application_status, diff, last_edit, last_accept]) # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj(): out_reapps.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.created_date, latest.last_updated, latest.last_manual_update, latest.application_status, mdiff]) # now figure out if the account is missing owner = journal.owner if owner is None: out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), "NO OWNER"]) else: acc = Account.pull(owner) if acc is None: out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), owner]) print "processed", counter, "journals"
def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises): # set up for the test ######################################### cj = None has_seal = bool(randint(0, 1)) application = None if application_type == "present": application = Suggestion( **ApplicationFixtureFactory.make_application_source()) application.set_id(application.makeid()) application.remove_contacts() application.remove_editor_group() application.remove_editor() application.remove_owner() application.remove_current_journal() application.remove_notes() if app_key_properties == "yes": application.add_contact("Application", "*****@*****.**") application.set_editor_group("appeditorgroup") application.set_editor("appeditor") application.set_owner("appowner") application.set_seal(has_seal) application.add_note("Application Note") if current_journal == "present": journal = Journal( **JournalFixtureFactory.make_journal_source()) journal.remove_contacts() journal.add_contact("Journal", "*****@*****.**") journal.set_editor_group("journaleditorgroup") journal.set_editor("journaleditor") journal.set_owner("journalowner") journal.remove_current_application() journal.remove_notes() journal.add_note("Journal Note") journal.save(blocking=True) application.set_current_journal(journal.id) cj = journal elif current_journal == "missing": application.set_current_journal("123456789987654321") manual_update = None if manual_update_arg == "true": manual_update = True elif manual_update_arg == "false": manual_update = False # execute the test ######################################## svc = DOAJ.applicationService() if raises is not None and raises != "": with self.assertRaises(EXCEPTIONS[raises]): svc.application_2_journal(application, manual_update) else: journal = svc.application_2_journal(application, manual_update) # check the result ###################################### assert journal is not None assert isinstance(journal, Journal) assert journal.is_in_doaj() is True jbj = journal.bibjson().data del jbj["active"] assert jbj == application.bibjson().data if current_journal == "present": assert len(journal.related_applications) == 3 else: assert len(journal.related_applications) == 1 related = journal.related_application_record(application.id) assert related is not None if manual_update_arg == "true": assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z" if app_key_properties == "yes": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Application" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "appeditorgroup" assert journal.editor == "appeditor" assert journal.owner == "appowner" assert journal.has_seal() == has_seal if current_journal == "present": assert len(journal.notes) == 2 else: assert len(journal.notes) == 1 elif app_key_properties == "no": if current_journal == "present": contacts = journal.contacts() assert len(contacts) == 1 assert contacts[0].get("name") == "Journal" assert contacts[0].get("email") == "*****@*****.**" assert journal.editor_group == "journaleditorgroup" assert journal.editor == "journaleditor" assert journal.owner == "journalowner" assert journal.has_seal() == has_seal assert len(journal.notes) == 2 elif current_journal == "none" or current_journal == "missing": contacts = journal.contacts() assert len(contacts) == 0 assert journal.editor_group is None assert journal.editor is None assert journal.owner is None assert journal.has_seal() == has_seal assert len(journal.notes) == 1 if current_journal == "present": assert cj.id == journal.id assert cj.created_date == journal.created_date